source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 32829

Last change on this file since 32829 was 32829, checked in by ak19, 5 years ago

Related to previous commit: coll_db may not be instantiated if oaiserver servlet has not been visited and only library servlet has been visited. So cleanUp() shouldn't try closing the coll_db handle if it's still null.

File size: 37.0 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 protected HashMap<String, Element> format_meta_elem_map = null;
72
73 protected String index_stem = "";
74 protected String infodb_type = "";
75
76 /** constructor */
77 public OAIPMH() {
78
79 }
80
81 public void cleanUp() {
82 super.cleanUp();//??
83
84 if(this.coll_db != null) {
85 this.coll_db.closeDatabase();
86 this.coll_db = null;
87 }
88 if (this.oaiinf_db != null){
89 this.oaiinf_db.closeDatabase();
90 }
91 }
92
93 /** configure this service
94 info is the OAIPMH service rack from collectionConfig.xml, and
95 extra_info is buildConfig.xml */
96 public boolean configure(Element info, Element extra_info) {
97 if (!super.configure(info, extra_info)){
98 logger.info("Configuring ServiceRack.java returns false.");
99 return false;
100 }
101
102 //get the names from ServiceRack.java
103 this.site_name = this.router.getSiteName();
104 this.coll_name = this.cluster_name;
105
106 logger.info("Configuring OAIPMH...");
107
108 this.config_info = info;
109
110 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
111 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
112
113 if (metadata_list != null) {
114
115 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
116
117 if (index_stem_elem != null) {
118 this.index_stem = GSXML.getNodeText(index_stem_elem);
119 }
120
121 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
122 if (infodb_type_elem != null) {
123 this.infodb_type = GSXML.getNodeText(infodb_type_elem);
124 }
125
126 }
127
128 if (index_stem == null || index_stem.equals("")) {
129 this.index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
130 }
131 if (infodb_type == null || infodb_type.equals("")) {
132 this.infodb_type = "gdbm"; // the default
133 }
134
135 // DB OPENING STUFF MOVED TO configureOAI(), because OAIPMH.configure() is called by the regular MessageRouter when this activates collections for the regular "library" servlet
136 // whereas OAIPMH.configureOAI() is only called by OAIMessageRouter when it activates collections for the "oaiserver" servlet (after OAIMessageRouter calls regular configure() first)
137 // We don't want the DBs opened twice: once by MessageRouter's call to OAIPMH.configure() and once by OAIMessageRouter calling OAIPMH.configure().
138
139 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
140 configureSetInfo();
141 // the short_service_info is used by the message router to find the method names,
142
143 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
144 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
145 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
146 this.short_service_info.appendChild(list_records);
147
148 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
149 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
150 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
151 this.short_service_info.appendChild(list_identifiers);
152
153 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
154 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
155 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
156 this.short_service_info.appendChild(list_sets);
157
158 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
159 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
160 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
161 this.short_service_info.appendChild(list_metadata_formats);
162
163 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
164 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
165 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
166 this.short_service_info.appendChild(get_record);
167
168 return true;
169 }
170
171 public boolean configureOAI(Element oai_config_elem) {
172 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
173 this.format_response_map = new HashMap<String, Element>();
174 this.format_elements_map = new HashMap<String, HashSet<String>>();
175 this.format_meta_elem_map = new HashMap<String, Element>();
176
177 // for now, all we want is the metadata prefix description and the mapping list
178 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
179 if (main_lmf_elem == null) {
180 logger.error("No listMetadataFormats element found in OAIConfig.xml");
181 return false;
182 }
183 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
184 if (meta_formats_list.getLength() == 0) {
185 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
186 return false;
187 }
188
189 boolean found_meta_format = false;
190 for(int i=0; i<meta_formats_list.getLength(); i++) {
191 Element mf = (Element) meta_formats_list.item(i);
192 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
193 if (prefix.equals("")) {
194 logger.error("metadataFormat element had no metadataPrefix attribute");
195 continue;
196 }
197 // get the right format from OAICOnfig
198 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
199 if (meta_format == null) {
200 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
201 continue;
202 }
203
204 // copy the format definition into our stored Element
205 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
206 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
207 this.meta_formats_definition.appendChild(collection_version_format);
208 // set up the response element for this format
209 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
210 // add in collection specific mappings
211 addCollectionMappings(collection_version_format, mf);
212 // now set up a list of all collection elements for reverse lookup of the mapping
213 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
214 format_meta_elem_map.put(prefix, OAIXML.getMetadataPrefixElement(this.desc_doc, prefix, collection_version_format));
215
216 } // end for
217
218 // Open the coll db and oai-inf db databases and store handles to them
219 coll_db = new SimpleCollectionDatabase(infodb_type);
220 if (!coll_db.databaseOK()) {
221 logger.error("Couldn't create the collection database of type "+infodb_type);
222 return false;
223 }
224
225 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
226 if (!oaiinf_db.databaseOK()) {
227 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
228 return false;
229 }
230
231
232 // Open databases for querying
233 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
234 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
235 logger.error("Could not open collection database!");
236 return false;
237 }
238 // the oaiinf_db is called oai-inf.<infodb_type_extension>
239 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
240 File oaiinfFile = new File(oaiinf_db_file);
241
242 if(!oaiinfFile.exists()) {
243 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
244 oaiinf_db = null;
245 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
246 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
247 oaiinf_db = null;
248 }
249
250 return true;
251 }
252
253 /**
254 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
255 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
256 */
257 public long getEarliestTimestamp() {
258 long timestamp = -1;
259
260 DBInfo oai_info = null;
261 if(oaiinf_db != null) {
262 // get internal record containing the earliest timestamp of the collection
263 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
264 if (oai_info == null) {
265 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
266 } else {
267 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
268 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
269 }
270 }
271 return timestamp;
272 }
273
274 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
275 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
276 for (int i=0; i<formats.getLength(); i++) {
277 Element format = (Element)formats.item(i);
278 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
279 if (prefix.equals(meta_name)) {
280 return format;
281 }
282 }
283 return null;
284 }
285
286 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
287 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
288
289 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
290 Document doc = element_list.getOwnerDocument();
291 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
292 if (coll_elements.getLength()==0) {
293 // no mappings to include
294 return;
295 }
296 for (int i=0; i<coll_elements.getLength(); i++) {
297 Element e = (Element)coll_elements.item(i);
298 String elem_name = e.getAttribute(GSXML.NAME_ATT);
299 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
300 if (main_elem == null) {
301 logger.error(elem_name+" not found in meta format, not using it");
302 } else {
303 element_list.replaceChild(doc.importNode(e, true),main_elem );
304 }
305 }
306 }
307
308 /** goes through all the mappings and makes a set of all collection
309 metadata names that could become an oai meta element - acts as
310 a reverse lookup for the mappings */
311 protected HashSet<String> getAllCollectionElements(Element meta_format) {
312 HashSet<String> meta_name_set = new HashSet<String>();
313 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
314 for (int i=0; i<elements.getLength(); i++) {
315 Element e = (Element)elements.item(i);
316 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
317 if (map == null) {
318 // there is no mapping, just use the element name
319 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
320 } else {
321 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
322 String[] name_array = list_of_names.split(",");
323 for (int j=0; j<name_array.length; j++) {
324 meta_name_set.add(name_array[j]);
325 }
326 }
327 }
328 return meta_name_set;
329 }
330
331 /** returns a specific service description */
332 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
333
334 if (service_id.equals(OAIXML.LIST_RECORDS)) {
335 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
336 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
337 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
338 return list_records;
339 }
340
341 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
342 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
343 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
344 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
345 return list_identifiers;
346 }
347 if (service_id.equals(OAIXML.LIST_SETS)) {
348 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
349 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
350 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
351 return list_sets;
352 }
353 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
354 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
355 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
356 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
357 return list_metadata_formats;
358 }
359
360 if (service_id.equals(OAIXML.GET_RECORD)) {
361 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
362 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
363 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
364 return get_record;
365 }
366
367 return null;
368 }
369
370 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
371 protected Element processListSets(Element req) {
372 return list_sets_response;
373 }
374 /** returns the actual record element used in the OAI GetRecord response */
375 protected Element processGetRecord(Element req) {
376 /** arguments:
377 identifier: required
378 metadataPrefix: required
379 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
380 */
381 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
382 HashMap<String, String> param_map = GSXML.getParamMap(params);
383
384 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
385 if (prefix == null || prefix.equals("")) {
386 //Just a double-check
387 logger.error("the value of metadataPrefix att is not present in the request.");
388 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
389 }
390
391 // check that we support this format
392 if (!format_response_map.containsKey(prefix)) {
393 logger.error("metadata prefix is not supported for collection "+this.coll_name);
394 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
395 }
396
397 Document doc = XMLConverter.newDOM();
398
399 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
400 boolean OID_is_deleted = false;
401 long millis = -1;
402
403 DBInfo oai_info = null;
404 if(oaiinf_db != null) {
405 oai_info = this.oaiinf_db.getInfo(oid);
406 if (oai_info == null) {
407 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
408 } else {
409
410 // indexdb doesn't have info on deleted docs, only oaiinf db does.
411 // So only oaiinfdb has timestamps for deleted docs
412 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
413 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
414 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
415 millis = Long.parseLong(timestamp)*1000; // in milliseconds
416
417 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
418 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
419 OID_is_deleted = true;
420 }
421 }
422 }
423
424 //get a DBInfo object of the identifier; if this identifier is not present in the database,
425 // null is returned.
426 DBInfo info = this.coll_db.getInfo(oid);
427 if (info == null) {
428 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
429 logger.error("OID: " + oid + " is not present in the collection index database.");
430 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
431 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
432 }
433 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
434 millis = getDateStampMillis(info);
435 }
436 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
437
438
439 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
440 Element get_record = doc.createElement(OAIXML.GET_RECORD);
441 get_record_response.appendChild(get_record);
442 Element record = doc.createElement(OAIXML.RECORD);
443 //compose the header element
444 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
445 if(!OID_is_deleted) {
446 //compose the metadata element
447 record.appendChild(createMetadataElement(doc, prefix, info));
448 }
449 get_record.appendChild(record);
450 return get_record_response;
451 }
452
453 /** return a list of records in specified set, containing metadata from specified prefix*/
454 protected Element processListRecords(Element req) {
455 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
456 }
457
458 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
459 protected Element processListIdentifiers(Element req) {
460 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
461 }
462
463 // Get a list of records/identifiers that match the parameters.
464 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
465 /** arguments:
466 metadataPrefix: required
467 * from: optional
468 * until: optional
469 * set: optional
470 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
471 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
472 */
473 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
474
475 if(params.getLength() == 0) {
476 logger.error("must at least have the metadataPrefix parameter, can't be none");
477 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
478 }
479
480 HashMap<String, String> param_map = GSXML.getParamMap(params);
481
482 String prefix = "";
483 Date from_date = null;
484 Date until_date = null;
485
486 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
487 //Just a double-check
488 logger.error("A param element containing the metadataPrefix is not present.");
489 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
490 }
491 prefix = param_map.get(OAIXML.METADATA_PREFIX);
492 if (prefix == null || prefix.equals("")) {
493 //Just a double-check
494 logger.error("the value of metadataPrefix att is not present in the request.");
495 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
496 }
497
498 if(param_map.containsKey(OAIXML.FROM)) {
499 String from = param_map.get(OAIXML.FROM);
500 from_date = OAIXML.getDate(from);
501 }
502 if(param_map.containsKey(OAIXML.UNTIL)) {
503 String until = param_map.get(OAIXML.UNTIL);
504 until_date = OAIXML.getDate(until);
505 }
506
507 if (!format_response_map.containsKey(prefix)) {
508 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
509 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
510 }
511
512 // get list of oids
513 ArrayList<String> oid_list = null;
514 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
515 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
516
517 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
518 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
519 oid_list = getChildrenIds(OAIXML.BROWSELIST);
520 }
521 }
522
523 if (oid_list == null) {
524 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
525 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
526 }
527 // all validation is done
528
529 // get the list of elements that are in this metadata prefix
530 HashSet<String> set_of_elems = format_elements_map.get(prefix);
531
532 Document doc = XMLConverter.newDOM();
533 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
534 Element list_items = doc.createElement(response_name);
535 list_items_response.appendChild(list_items);
536
537 for(int i=0; i<oid_list.size(); i++) {
538 String oid = oid_list.get(i);
539
540 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
541 continue;
542 }
543
544 boolean OID_is_deleted = false;
545 long millis = -1;
546
547 DBInfo oai_info = null;
548 if(oaiinf_db != null) {
549 oai_info = this.oaiinf_db.getInfo(oid);
550 if (oai_info == null) {
551 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
552 } else {
553
554 // indexdb doesn't have info on deleted docs, only oaiinf db does.
555 // So only oaiinfdb has timestamps for deleted docs
556 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
557 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
558 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
559 millis = Long.parseLong(timestamp)*1000; // in milliseconds
560
561 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
562 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
563 OID_is_deleted = true;
564 }
565 }
566 }
567 DBInfo info = this.coll_db.getInfo(oid);
568 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
569 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
570 logger.error("Collection database does not contain information about oid: " +oid);
571 }
572 }
573 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
574
575 millis = getDateStampMillis(info);
576 }
577
578 Date this_date = null;
579 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
580
581 if (millis == -1) {
582 if (from_date != null || until_date !=null) {
583 continue; // if this doc doesn't have a date for some reason, and
584 // we are doing a date range, then don't include it.
585 }
586 } else {
587 this_date = new Date(millis);
588 if (from_date != null) {
589 if(this_date.before(from_date)) {
590 continue;
591 }
592 }
593 if (until_date != null) {
594 if (this_date.after(until_date)) {
595 continue;
596 }
597 }
598 }
599
600 //compose the header element, which we'll be appending no matter what
601 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
602
603 if (include_metadata) { // doing ListRecords
604 // compose a record for adding header and metadata
605 Element record = doc.createElement(OAIXML.RECORD);
606 list_items.appendChild(record);
607 //insert the header element
608 record.appendChild(header);
609 //Now check that this id has metadata for the required prefix.
610 if (info != null && documentContainsMetadata(info, set_of_elems)) {
611 // YES, it does have some metadata for this prefix
612 //compose the metadata element
613 record.appendChild(createMetadataElement(doc, prefix, info));
614 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
615 } else { // doing ListIdentifiers
616 //append the header element
617 list_items.appendChild(header);
618 }
619
620 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
621
622 return list_items_response;
623
624 }
625
626
627 // have implemented setDescription as an element, instead of a container containing metadata
628 private boolean configureSetInfo() {
629
630 Document doc = XMLConverter.newDOM();
631 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
632 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
633 this.list_sets_response.appendChild(list_sets_elem);
634 String set_name = this.coll_name;
635 String set_description = null;
636 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
637 if (name_elem!=null) {
638 set_name = GSXML.getNodeText(name_elem);
639 if (set_name.equals("")) {
640 set_name = this.coll_name; // default to coll name if can't find one
641 }
642 }
643 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
644 if (description_elem!=null) {
645 set_description = GSXML.getNodeText(description_elem);
646 if (set_description.equals("")) {
647 set_description = null;
648 }
649 }
650 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
651 list_sets_elem.appendChild(coll_set);
652
653 // are we part of any super sets?
654 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
655 for (int i=0; i<super_set_list.getLength(); i++) {
656 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
657 if (super_name != null && !super_name.equals("")) {
658 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
659 }
660 }
661 return true;
662 }
663
664 /** create the metadata element used when processing ListRecords/GetRecord requests
665 */
666 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
667 // the <metadata> element
668 Element metadata = doc.createElement(OAIXML.METADATA);
669 // the <oai:dc namespace...> element
670 Element prfx_str_elem = (Element)doc.importNode(this.format_meta_elem_map.get(prefix), true);
671 metadata.appendChild(prfx_str_elem);
672
673 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
674 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
675 // for each element in the definition
676 for (int i=0; i<elements.getLength(); i++) {
677 Element e = (Element)elements.item(i);
678 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
679 if (map == null) {
680 // look up the element name
681 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
682 } else {
683 // we go though the list of names in the mapping
684 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
685 }
686 }
687 // output any metadata that is not just a simple mapping
688 addCustomMetadata(prfx_str_elem, prefix, info);
689 return metadata;
690 }
691
692 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
693 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
694 Vector<String> values = info.getMultiInfo(meta_name);
695 if (values != null && values.size()!=0) {
696 for (int i=0; i<values.size(); i++) {
697 addMetadataElement(meta_list_elem, meta_name, values.get(i));
698 }
699 }
700 }
701
702 /** more complicated addMetadata - can add multiple items. */
703 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
704 String[] names = name_list.split(",");
705 for (int i=0; i<names.length; i++) {
706 Vector<String> values;
707 // some special words
708 if (names[i].startsWith(OAIXML.GSF_LINK_PREFIX)) {
709 values = new Vector<String>();
710 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
711 String link_url = null;
712 if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_DOCUMENT)) {
713 link_url = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
714 } else if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_SOURCE)) {
715 String srcfile = info.getInfo("srclinkFile");
716 if (!srcfile.equals("")) {
717 link_url = base_url.replace("oaiserver", "") + "sites/"
718 + this.site_name
719 + "/collect/" + this.coll_name + "/index/assoc/"
720 + info.getInfo("assocfilepath") + "/" + srcfile;
721 }
722 }
723 if (link_url !=null) {
724 values.add(link_url);
725 }
726 } else {
727 values = info.getMultiInfo(names[i]);
728 }
729 if (values == null || values.size()==0) {
730 continue;
731 }
732 for (int j=0; j<values.size(); j++) {
733 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
734 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
735 return; // only want to add one value
736 }
737 }
738 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
739 return; // we have added all values of this meta elem
740 }
741 // otherwise, we will keep going through the list and add them all.
742 }
743 }
744
745 // specific metadata formats might need to do some custom metadata that is not
746 //just a standard mapping.
747 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
748
749
750 }
751
752 /** create the actual metadata element for the list */
753 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
754
755 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
756 meta_list_elem.appendChild(meta);
757 }
758
759
760 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
761 */
762 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
763
764 Element header = doc.createElement(OAIXML.HEADER);
765
766 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
767 if(deleted) {
768 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
769 // then the timestamp for deletion will be from oai-inf database
770 }
771
772 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
773 GSXML.setNodeText(identifier, coll_name + ":" + oid);
774 header.appendChild(identifier);
775 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
776 GSXML.setNodeText(set_spec, coll_name);
777 header.appendChild(set_spec);
778 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
779 GSXML.setNodeText(datestamp, oailastmodified);
780 header.appendChild(datestamp);
781 return header;
782 }
783
784 /** return the metadata information */
785 protected Element processListMetadataFormats(Element req) {
786 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
787 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
788 if (param == null) {
789 logger.error("An element containing the OID attribute not is present.");
790 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
791 }
792 String oid = param.getAttribute(GSXML.VALUE_ATT);
793 if (oid == null || oid.equals("")) {
794 logger.error("No OID is present in the request.");
795 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
796 }
797
798 /*
799 ArrayList<String> oid_list = null;
800 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
801 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
802
803 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
804 oid_list = getChildrenIds(OAIXML.BROWSELIST);
805 }
806 }
807 */
808 // assume meta formats are only for OIDs that have not been deleted
809 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
810 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
811 if (oid_list == null || oid_list.contains(oid) == false) {
812 logger.error("OID: " + oid + " is not present in the database.");
813 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
814// logger.error((new XMLConverter()).getPrettyString (e));
815 return e;
816 }
817
818 DBInfo info = null;
819 info = this.coll_db.getInfo(oid);
820 if (info == null) { //just double check
821 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
822 }
823
824 Document doc = XMLConverter.newDOM();
825 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
826
827 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
828 list_metadata_formats_response.appendChild(list_metadata_formats);
829 boolean has_meta_format = false;
830
831 // for each format in format_elements_map
832 Iterator<String> it = format_elements_map.keySet().iterator();
833 while (it.hasNext()) {
834 String format = it.next();
835 HashSet<String> set_of_elems = format_elements_map.get(format);
836 if (documentContainsMetadata(info, set_of_elems)) {
837 // add this format into the response
838 has_meta_format = true;
839 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
840 }
841 }
842
843 if (has_meta_format == false) {
844 logger.error("Specified metadata names are not contained in the database.");
845 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
846 } else {
847 return list_metadata_formats_response;
848 }
849 }
850
851 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
852 if (set_of_elems.size() == 0) {
853 return false;
854 }
855 Iterator<String> i = set_of_elems.iterator();
856 while (i.hasNext()) {
857 if (!info.getInfo(i.next()).equals("")) {
858 return true;
859 }
860 }
861 return false;
862 }
863
864 /** returns a list of the child ids in order, null if no children */
865 protected ArrayList<String> getChildrenIds(String node_id) {
866 DBInfo info = this.coll_db.getInfo(node_id);
867 if (info == null) {
868 return null;
869 }
870
871 String contains = info.getInfo("contains");
872 if (contains.equals("")) {
873 return null;
874 }
875 ArrayList<String> children = new ArrayList<String>();
876 StringTokenizer st = new StringTokenizer(contains, ";");
877 while (st.hasMoreTokens()) {
878 String child_id = st.nextToken().replaceAll("\"", node_id);
879 children.add(child_id);
880 }
881 return children;
882 }
883 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
884 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
885 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
886 * name which is mandatory.
887 */
888 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
889 if (metadata_names == null) return false;
890 logger.info("checking metadata names in db.");
891 for(int i=0; i<metadata_names.length; i++) {
892 int index = metadata_names[i].indexOf(",");
893 String meta_name = (index == -1) ? metadata_names[i] :
894 metadata_names[i].substring(index + 1);
895
896 if(info.getInfo(meta_name).equals("") == false) {
897 return true;
898 }
899 }
900 return false;
901 }
902
903 protected long getDateStampMillis(DBInfo info) {
904 // gs.OAIDateStamp is in YYYY-MM-DD
905 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
906 long millis = -1;
907 if (!time_stamp.equals("")) {
908 millis = OAIXML.getTime(time_stamp);
909 }
910 if (millis == -1) {
911 // oailastmodified is in seconds
912 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
913 if (!time_stamp.equals("")) {
914 millis = Long.parseLong(time_stamp)*1000;
915 }
916 }
917 return millis;
918
919
920 }
921}
922
923
Note: See TracBrowser for help on using the repository browser.