source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 31241

Last change on this file since 31241 was 31241, checked in by ak19, 7 years ago

Corrected misunderstanding on distinction between whether we're processing a ListRecords request or a ListIdentifiers request. The difference was denoted by the boolean include_metadata parameter.

File size: 34.4 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 /** constructor */
72 public OAIPMH() {
73
74 }
75
76 public void cleanUp() {
77 super.cleanUp();//??
78 this.coll_db.closeDatabase();
79 this.oaiinf_db.closeDatabase();
80 }
81 /** configure this service
82 info is the OAIPMH service rack from collectionConfig.xml, and
83 extra_info is buildConfig.xml */
84 public boolean configure(Element info, Element extra_info) {
85 if (!super.configure(info, extra_info)){
86 logger.info("Configuring ServiceRack.java returns false.");
87 return false;
88 }
89
90 //get the names from ServiceRack.java
91 this.site_name = this.router.getSiteName();
92 this.coll_name = this.cluster_name;
93
94 logger.info("Configuring OAIPMH...");
95
96 this.config_info = info;
97
98 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
99 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
100 String index_stem = "";
101 String infodb_type = "";
102 if (metadata_list != null) {
103
104 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
105
106 if (index_stem_elem != null) {
107 index_stem = GSXML.getNodeText(index_stem_elem);
108 }
109
110 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
111 if (infodb_type_elem != null) {
112 infodb_type = GSXML.getNodeText(infodb_type_elem);
113 }
114
115 }
116
117 if (index_stem == null || index_stem.equals("")) {
118 index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
119 }
120 if (infodb_type == null || infodb_type.equals("")) {
121 infodb_type = "gdbm"; // the default
122 }
123
124 coll_db = new SimpleCollectionDatabase(infodb_type);
125 if (!coll_db.databaseOK()) {
126 logger.error("Couldn't create the collection database of type "+infodb_type);
127 return false;
128 }
129
130 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
131 if (!oaiinf_db.databaseOK()) {
132 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
133 return false;
134 }
135
136
137 // Open databases for querying
138 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
139 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
140 logger.error("Could not open collection database!");
141 return false;
142 }
143 // the oaiinf_db is called oai-inf.<infodb_type_extension>
144 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
145 File oaiinfFile = new File(oaiinf_db_file);
146
147 if(!oaiinfFile.exists()) {
148 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
149 oaiinf_db = null;
150 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
151 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
152 oaiinf_db = null;
153 }
154
155 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
156 configureSetInfo();
157 // the short_service_info is used by the message router to find the method names,
158
159 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
160 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
161 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
162 this.short_service_info.appendChild(list_records);
163
164 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
165 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
166 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
167 this.short_service_info.appendChild(list_identifiers);
168
169 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
170 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
171 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
172 this.short_service_info.appendChild(list_sets);
173
174 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
175 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
176 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
177 this.short_service_info.appendChild(list_metadata_formats);
178
179 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
180 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
181 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
182 this.short_service_info.appendChild(get_record);
183
184 return true;
185 }
186
187 public boolean configureOAI(Element oai_config_elem) {
188 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
189 this.format_response_map = new HashMap<String, Element>();
190 this.format_elements_map = new HashMap<String, HashSet<String>>();
191
192 // for now, all we want is the metadata prefix description and the mapping list
193 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
194 if (main_lmf_elem == null) {
195 logger.error("No listMetadataFormats element found in OAIConfig.xml");
196 return false;
197 }
198 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
199 if (meta_formats_list.getLength() == 0) {
200 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
201 return false;
202 }
203 boolean found_meta_format = false;
204 for(int i=0; i<meta_formats_list.getLength(); i++) {
205 Element mf = (Element) meta_formats_list.item(i);
206 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
207 if (prefix.equals("")) {
208 logger.error("metadataFormat element had no metadataPrefix attribute");
209 continue;
210 }
211 // get the right format from OAICOnfig
212 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
213 if (meta_format == null) {
214 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
215 continue;
216 }
217 // copy the format definition into our stored Element
218 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
219 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
220 this.meta_formats_definition.appendChild(collection_version_format);
221 // set up the response element for this format
222 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
223 // add in collection specific mappings
224 addCollectionMappings(collection_version_format, mf);
225 // now set up a list of all collection elements for reverse lookup of the mapping
226 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
227
228 }
229 return true;
230 }
231
232 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
233 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
234 for (int i=0; i<formats.getLength(); i++) {
235 Element format = (Element)formats.item(i);
236 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
237 if (prefix.equals(meta_name)) {
238 return format;
239 }
240 }
241 return null;
242 }
243
244 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
245 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
246
247 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
248 Document doc = element_list.getOwnerDocument();
249 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
250 if (coll_elements.getLength()==0) {
251 // no mappings to include
252 return;
253 }
254 for (int i=0; i<coll_elements.getLength(); i++) {
255 Element e = (Element)coll_elements.item(i);
256 String elem_name = e.getAttribute(GSXML.NAME_ATT);
257 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
258 if (main_elem == null) {
259 logger.error(elem_name+" not found in meta format, not using it");
260 } else {
261 element_list.replaceChild(doc.importNode(e, true),main_elem );
262 }
263 }
264 }
265
266 /** goes through all the mappings and makes a set of all collection
267 metadata names that could become an oai meta element - acts as
268 a reverse lookup for the mappings */
269 protected HashSet<String> getAllCollectionElements(Element meta_format) {
270 HashSet<String> meta_name_set = new HashSet<String>();
271 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
272 for (int i=0; i<elements.getLength(); i++) {
273 Element e = (Element)elements.item(i);
274 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
275 if (map == null) {
276 // there is no mapping, just use the element name
277 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
278 } else {
279 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
280 String[] name_array = list_of_names.split(",");
281 for (int j=0; j<name_array.length; j++) {
282 meta_name_set.add(name_array[j]);
283 }
284 }
285 }
286 return meta_name_set;
287 }
288
289 /** returns a specific service description */
290 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
291
292 if (service_id.equals(OAIXML.LIST_RECORDS)) {
293 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
294 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
295 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
296 return list_records;
297 }
298
299 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
300 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
301 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
302 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
303 return list_identifiers;
304 }
305 if (service_id.equals(OAIXML.LIST_SETS)) {
306 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
307 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
308 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
309 return list_sets;
310 }
311 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
312 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
313 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
314 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
315 return list_metadata_formats;
316 }
317
318 if (service_id.equals(OAIXML.GET_RECORD)) {
319 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
320 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
321 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
322 return get_record;
323 }
324
325 return null;
326 }
327
328 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
329 protected Element processListSets(Element req) {
330 return list_sets_response;
331 }
332 /** returns the actual record element used in the OAI GetRecord response */
333 protected Element processGetRecord(Element req) {
334 /** arguments:
335 identifier: required
336 metadataPrefix: required
337 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
338 */
339 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
340 HashMap<String, String> param_map = GSXML.getParamMap(params);
341
342 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
343 if (prefix == null || prefix.equals("")) {
344 //Just a double-check
345 logger.error("the value of metadataPrefix att is not present in the request.");
346 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
347 }
348
349 // check that we support this format
350 if (!format_response_map.containsKey(prefix)) {
351 logger.error("metadata prefix is not supported for collection "+this.coll_name);
352 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
353 }
354
355 Document doc = XMLConverter.newDOM();
356
357 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
358 boolean OID_is_deleted = false;
359 long millis = -1;
360
361 DBInfo oai_info = null;
362 if(oaiinf_db != null) {
363 oai_info = this.oaiinf_db.getInfo(oid);
364 if (oai_info == null) {
365 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
366 } else {
367 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
368 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
369 OID_is_deleted = true;
370
371 // get the right timestamp for deletion: from oaiinf db
372 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // in seconds presumably, like oailastmodified in the collection index db
373
374 millis = Long.parseLong(timestamp)*1000; // in milliseconds
375 }
376 }
377 }
378
379 //get a DBInfo object of the identifier; if this identifier is not present in the database,
380 // null is returned.
381 DBInfo info = this.coll_db.getInfo(oid);
382 if (info == null) {
383 logger.error("OID: " + oid + " is not present in the collection database.");
384 //return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); // may exist as deleted in oai-inf db
385 }
386 else if (millis == -1) { // so !OID_is_deleted, get oailastmodified from collection's index db
387 ArrayList<String> keys = new ArrayList<String>(info.getKeys());
388 millis = getDateStampMillis(info);
389 }
390 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
391
392
393 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
394 Element get_record = doc.createElement(OAIXML.GET_RECORD);
395 get_record_response.appendChild(get_record);
396 Element record = doc.createElement(OAIXML.RECORD);
397 //compose the header element
398 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
399 if(!OID_is_deleted) {
400 //compose the metadata element
401 record.appendChild(createMetadataElement(doc, prefix, info));
402 }
403 get_record.appendChild(record);
404 return get_record_response;
405 }
406
407 /** return a list of records in specified set, containing metadata from specified prefix*/
408 protected Element processListRecords(Element req) {
409 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
410 }
411
412 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
413 protected Element processListIdentifiers(Element req) {
414 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
415 }
416
417 // Get a list of records/identifiers that match the parameters.
418 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
419 /** arguments:
420 metadataPrefix: required
421 * from: optional
422 * until: optional
423 * set: optional
424 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
425 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
426 */
427 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
428
429 if(params.getLength() == 0) {
430 logger.error("must at least have the metadataPrefix parameter, can't be none");
431 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
432 }
433
434 HashMap<String, String> param_map = GSXML.getParamMap(params);
435
436 String prefix = "";
437 Date from_date = null;
438 Date until_date = null;
439
440 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
441 //Just a double-check
442 logger.error("A param element containing the metadataPrefix is not present.");
443 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
444 }
445 prefix = param_map.get(OAIXML.METADATA_PREFIX);
446 if (prefix == null || prefix.equals("")) {
447 //Just a double-check
448 logger.error("the value of metadataPrefix att is not present in the request.");
449 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
450 }
451
452 if(param_map.containsKey(OAIXML.FROM)) {
453 String from = param_map.get(OAIXML.FROM);
454 from_date = OAIXML.getDate(from);
455 }
456 if(param_map.containsKey(OAIXML.UNTIL)) {
457 String until = param_map.get(OAIXML.UNTIL);
458 until_date = OAIXML.getDate(until);
459 }
460
461 if (!format_response_map.containsKey(prefix)) {
462 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
463 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
464 }
465
466 // get list of oids
467 ArrayList<String> oid_list = null;
468 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
469 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
470
471 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
472 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
473 oid_list = getChildrenIds(OAIXML.BROWSELIST);
474 }
475 }
476
477 if (oid_list == null) {
478 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
479 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
480 }
481 // all validation is done
482
483 // get the list of elements that are in this metadata prefix
484 HashSet<String> set_of_elems = format_elements_map.get(prefix);
485
486 Document doc = XMLConverter.newDOM();
487 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
488 Element list_items = doc.createElement(response_name);
489 list_items_response.appendChild(list_items);
490
491 for(int i=0; i<oid_list.size(); i++) {
492 String oid = oid_list.get(i);
493 boolean OID_is_deleted = false;
494 long millis = -1;
495
496 DBInfo oai_info = null;
497 if(oaiinf_db != null) {
498 oai_info = this.oaiinf_db.getInfo(oid);
499 if (oai_info == null) {
500 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
501 } else {
502 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
503 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
504 OID_is_deleted = true;
505
506 // get the right timestamp for deletion: from oaiinf db
507 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // in seconds presumably, like oailastmodified in the collection index db
508
509 millis = Long.parseLong(timestamp)*1000; // in milliseconds
510 }
511 }
512 }
513 DBInfo info = this.coll_db.getInfo(oid);
514 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
515 logger.error("Collection database does not contain information about oid: " +oid);
516 }
517 else if (millis == -1) { // so !OID_is_deleted, get oailastmodified from collection's index db
518
519 millis = getDateStampMillis(info);
520 }
521
522 Date this_date = null;
523 if (millis == -1) {
524 if (from_date != null || until_date !=null) {
525 continue; // if this doc doesn't have a date for some reason, and
526 // we are doing a date range, then don't include it.
527 }
528 } else {
529 this_date = new Date(millis);
530 if (from_date != null) {
531 if(this_date.before(from_date)) {
532 continue;
533 }
534 }
535 if (until_date != null) {
536 if (this_date.after(until_date)) {
537 continue;
538 }
539 }
540 }
541
542 //compose the header element, which we'll be appending no matter what
543 Element header = createHeaderElement(doc, oid, OAIXML.getTime(millis), OID_is_deleted);
544
545 if (include_metadata) { // doing ListRecords
546 // compose a record for adding header and metadata
547 Element record = doc.createElement(OAIXML.RECORD);
548 list_items.appendChild(record);
549 //insert the header element
550 record.appendChild(header);
551 //Now check that this id has metadata for the required prefix.
552 if (info != null && documentContainsMetadata(info, set_of_elems)) {
553 // YES, it does have some metadata for this prefix
554 //compose the metadata element
555 record.appendChild(createMetadataElement(doc, prefix, info));
556 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
557 } else { // doing ListIdentifiers
558 //append the header element
559 list_items.appendChild(header);
560 }
561
562 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
563
564 return list_items_response;
565
566 }
567
568
569 // have implemented setDescription as an element, instead of a container containing metadata
570 private boolean configureSetInfo() {
571
572 Document doc = XMLConverter.newDOM();
573 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
574 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
575 this.list_sets_response.appendChild(list_sets_elem);
576 String set_name = this.coll_name;
577 String set_description = null;
578 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
579 if (name_elem!=null) {
580 set_name = GSXML.getNodeText(name_elem);
581 if (set_name.equals("")) {
582 set_name = this.coll_name; // default to coll name if can't find one
583 }
584 }
585 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
586 if (description_elem!=null) {
587 set_description = GSXML.getNodeText(description_elem);
588 if (set_description.equals("")) {
589 set_description = null;
590 }
591 }
592 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
593 list_sets_elem.appendChild(coll_set);
594
595 // are we part of any super sets?
596 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
597 for (int i=0; i<super_set_list.getLength(); i++) {
598 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
599 if (super_name != null && !super_name.equals("")) {
600 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
601 }
602 }
603 return true;
604 }
605
606 /** create the metadata element used when processing ListRecords/GetRecord requests
607 */
608 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
609 // the <metadata> element
610 Element metadata = doc.createElement(OAIXML.METADATA);
611 // the <oai:dc namespace...> element
612 Element prfx_str_elem = OAIXML.getMetadataPrefixElement(doc, prefix, OAIXML.oai_version);
613 metadata.appendChild(prfx_str_elem);
614
615 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
616 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
617 // for each element in the definition
618 for (int i=0; i<elements.getLength(); i++) {
619 Element e = (Element)elements.item(i);
620 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
621 if (map == null) {
622 // look up the element name
623 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
624 } else {
625 // we go though the list of names in the mapping
626 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
627 }
628 }
629 // output any metadata that is not just a simple mapping
630 addCustomMetadata(prfx_str_elem, prefix, info);
631 return metadata;
632 }
633
634 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
635 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
636 Vector<String> values = info.getMultiInfo(meta_name);
637 if (values != null && values.size()!=0) {
638 for (int i=0; i<values.size(); i++) {
639 addMetadataElement(meta_list_elem, meta_name, values.get(i));
640 }
641 }
642 }
643
644 /** more complicated addMetadata - can add multiple items. */
645 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
646 String[] names = name_list.split(",");
647 for (int i=0; i<names.length; i++) {
648 Vector<String> values = info.getMultiInfo(names[i]);
649 if (values == null || values.size()==0) {
650 continue;
651 }
652 for (int j=0; j<values.size(); j++) {
653 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
654 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
655 return; // only want to add one value
656 }
657 }
658 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
659 return; // we have added all values of this meta elem
660 }
661 // otherwise, we will keep going through the list and add them all.
662 }
663 }
664
665 // specific metadata formats might need to do some custom metadata that is not
666 //just a standard mapping. eg oai_dc outputting an identifier that is a link
667 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
668
669 if (prefix.equals(OAIXML.META_FORMAT_DC)) {
670 // we want to add in another dc:identifier element with a link to the resource if possible
671 // try gs.OAIResourceURL first, then srclinkFile, then GS version of documnet
672 String gsURL = info.getInfo(OAIXML.GS_OAI_RESOURCE_URL);
673 if (gsURL.equals("")) {
674 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
675 // try srclinkFile
676 gsURL = info.getInfo("srclinkFile");
677 if (!gsURL.equals("")) {
678 // make up the link to the file
679 gsURL = base_url.replace("oaiserver", "") + "sites/" + this.site_name
680 + "/collect/" + this.coll_name + "/index/assoc/"
681 + info.getInfo("assocfilepath") + "/" + gsURL;
682 } else {
683 // no srclink file, lets provide a link to the greenstone doc
684 gsURL = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
685 }
686 }
687 // now we have the url link, add as metadata
688 addMetadataElement(meta_list_elem, "dc:identifier", gsURL);
689 }
690 }
691
692 /** create the actual metadata element for the list */
693 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
694
695 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
696 meta_list_elem.appendChild(meta);
697 }
698
699
700 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
701 */
702 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
703
704 Element header = doc.createElement(OAIXML.HEADER);
705
706 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
707 if(deleted) {
708 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
709 // then the timestamp for deletion will be from oai-inf database
710 }
711
712 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
713 GSXML.setNodeText(identifier, coll_name + ":" + oid);
714 header.appendChild(identifier);
715 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
716 GSXML.setNodeText(set_spec, coll_name);
717 header.appendChild(set_spec);
718 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
719 GSXML.setNodeText(datestamp, oailastmodified);
720 header.appendChild(datestamp);
721 return header;
722 }
723
724 /** return the metadata information */
725 protected Element processListMetadataFormats(Element req) {
726 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
727 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
728 if (param == null) {
729 logger.error("An element containing the OID attribute not is present.");
730 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
731 }
732 String oid = param.getAttribute(GSXML.VALUE_ATT);
733 if (oid == null || oid.equals("")) {
734 logger.error("No OID is present in the request.");
735 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
736 }
737
738 /*
739 ArrayList<String> oid_list = null;
740 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
741 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
742
743 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
744 oid_list = getChildrenIds(OAIXML.BROWSELIST);
745 }
746 }
747 */
748 // assume meta formats are only for OIDs that have not been deleted
749 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
750 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
751 if (oid_list == null || oid_list.contains(oid) == false) {
752 logger.error("OID: " + oid + " is not present in the database.");
753 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
754// logger.error((new XMLConverter()).getPrettyString (e));
755 return e;
756 }
757
758 DBInfo info = null;
759 info = this.coll_db.getInfo(oid);
760 if (info == null) { //just double check
761 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
762 }
763
764 Document doc = XMLConverter.newDOM();
765 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
766
767 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
768 list_metadata_formats_response.appendChild(list_metadata_formats);
769 boolean has_meta_format = false;
770
771 // for each format in format_elements_map
772 Iterator<String> it = format_elements_map.keySet().iterator();
773 while (it.hasNext()) {
774 String format = it.next();
775 HashSet<String> set_of_elems = format_elements_map.get(format);
776 if (documentContainsMetadata(info, set_of_elems)) {
777 // add this format into the response
778 has_meta_format = true;
779 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
780 }
781 }
782
783 if (has_meta_format == false) {
784 logger.error("Specified metadata names are not contained in the database.");
785 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
786 } else {
787 return list_metadata_formats_response;
788 }
789 }
790
791 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
792 if (set_of_elems.size() == 0) {
793 return false;
794 }
795 Iterator<String> i = set_of_elems.iterator();
796 while (i.hasNext()) {
797 if (!info.getInfo(i.next()).equals("")) {
798 return true;
799 }
800 }
801 return false;
802 }
803
804 /** returns a list of the child ids in order, null if no children */
805 protected ArrayList<String> getChildrenIds(String node_id) {
806 DBInfo info = this.coll_db.getInfo(node_id);
807 if (info == null) {
808 return null;
809 }
810
811 String contains = info.getInfo("contains");
812 if (contains.equals("")) {
813 return null;
814 }
815 ArrayList<String> children = new ArrayList<String>();
816 StringTokenizer st = new StringTokenizer(contains, ";");
817 while (st.hasMoreTokens()) {
818 String child_id = st.nextToken().replaceAll("\"", node_id);
819 children.add(child_id);
820 }
821 return children;
822 }
823 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
824 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
825 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
826 * name which is mandatory.
827 */
828 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
829 if (metadata_names == null) return false;
830 logger.info("checking metadata names in db.");
831 for(int i=0; i<metadata_names.length; i++) {
832 int index = metadata_names[i].indexOf(",");
833 String meta_name = (index == -1) ? metadata_names[i] :
834 metadata_names[i].substring(index + 1);
835
836 if(info.getInfo(meta_name).equals("") == false) {
837 return true;
838 }
839 }
840 return false;
841 }
842
843 protected long getDateStampMillis(DBInfo info) {
844 // gs.OAIDateStamp is in YYYY-MM-DD
845 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
846 long millis = -1;
847 if (!time_stamp.equals("")) {
848 millis = OAIXML.getTime(time_stamp);
849 }
850 if (millis == -1) {
851 // oailastmodified is in seconds
852 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
853 if (!time_stamp.equals("")) {
854 millis = Long.parseLong(time_stamp)*1000;
855 }
856 }
857 return millis;
858
859
860 }
861}
862
863
Note: See TracBrowser for help on using the repository browser.