source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 32213

Last change on this file since 32213 was 32213, checked in by kjdon, 6 years ago

OAI: don't have hardcoded custom outputting for dc.identifier anymore. we do it via mapping in siteConfig, using special keywords gsflink.source, gsflink.document. User can choose to have links to gs.OAIResourceURL, srclink or link, or all or any combination of these. Prompted by diego wanting the srclink and link urls as dc.identifier

File size: 36.4 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 protected HashMap<String, Element> format_meta_elem_map = null;
72
73 /** constructor */
74 public OAIPMH() {
75
76 }
77
78 public void cleanUp() {
79 super.cleanUp();//??
80 this.coll_db.closeDatabase();
81 if (this.oaiinf_db != null){
82 this.oaiinf_db.closeDatabase();
83 }
84
85 }
86 /** configure this service
87 info is the OAIPMH service rack from collectionConfig.xml, and
88 extra_info is buildConfig.xml */
89 public boolean configure(Element info, Element extra_info) {
90 if (!super.configure(info, extra_info)){
91 logger.info("Configuring ServiceRack.java returns false.");
92 return false;
93 }
94
95 //get the names from ServiceRack.java
96 this.site_name = this.router.getSiteName();
97 this.coll_name = this.cluster_name;
98
99 logger.info("Configuring OAIPMH...");
100
101 this.config_info = info;
102
103 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
104 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
105 String index_stem = "";
106 String infodb_type = "";
107 if (metadata_list != null) {
108
109 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
110
111 if (index_stem_elem != null) {
112 index_stem = GSXML.getNodeText(index_stem_elem);
113 }
114
115 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
116 if (infodb_type_elem != null) {
117 infodb_type = GSXML.getNodeText(infodb_type_elem);
118 }
119
120 }
121
122 if (index_stem == null || index_stem.equals("")) {
123 index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
124 }
125 if (infodb_type == null || infodb_type.equals("")) {
126 infodb_type = "gdbm"; // the default
127 }
128
129 coll_db = new SimpleCollectionDatabase(infodb_type);
130 if (!coll_db.databaseOK()) {
131 logger.error("Couldn't create the collection database of type "+infodb_type);
132 return false;
133 }
134
135 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
136 if (!oaiinf_db.databaseOK()) {
137 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
138 return false;
139 }
140
141
142 // Open databases for querying
143 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
144 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
145 logger.error("Could not open collection database!");
146 return false;
147 }
148 // the oaiinf_db is called oai-inf.<infodb_type_extension>
149 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
150 File oaiinfFile = new File(oaiinf_db_file);
151
152 if(!oaiinfFile.exists()) {
153 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
154 oaiinf_db = null;
155 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
156 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
157 oaiinf_db = null;
158 }
159
160 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
161 configureSetInfo();
162 // the short_service_info is used by the message router to find the method names,
163
164 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
165 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
166 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
167 this.short_service_info.appendChild(list_records);
168
169 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
170 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
171 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
172 this.short_service_info.appendChild(list_identifiers);
173
174 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
175 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
176 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
177 this.short_service_info.appendChild(list_sets);
178
179 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
180 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
181 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
182 this.short_service_info.appendChild(list_metadata_formats);
183
184 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
185 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
186 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
187 this.short_service_info.appendChild(get_record);
188
189 return true;
190 }
191
192 public boolean configureOAI(Element oai_config_elem) {
193 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
194 this.format_response_map = new HashMap<String, Element>();
195 this.format_elements_map = new HashMap<String, HashSet<String>>();
196 this.format_meta_elem_map = new HashMap<String, Element>();
197
198 // for now, all we want is the metadata prefix description and the mapping list
199 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
200 if (main_lmf_elem == null) {
201 logger.error("No listMetadataFormats element found in OAIConfig.xml");
202 return false;
203 }
204 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
205 if (meta_formats_list.getLength() == 0) {
206 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
207 return false;
208 }
209
210 boolean found_meta_format = false;
211 for(int i=0; i<meta_formats_list.getLength(); i++) {
212 Element mf = (Element) meta_formats_list.item(i);
213 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
214 if (prefix.equals("")) {
215 logger.error("metadataFormat element had no metadataPrefix attribute");
216 continue;
217 }
218 // get the right format from OAICOnfig
219 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
220 if (meta_format == null) {
221 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
222 continue;
223 }
224
225 // copy the format definition into our stored Element
226 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
227 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
228 this.meta_formats_definition.appendChild(collection_version_format);
229 // set up the response element for this format
230 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
231 // add in collection specific mappings
232 addCollectionMappings(collection_version_format, mf);
233 // now set up a list of all collection elements for reverse lookup of the mapping
234 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
235 format_meta_elem_map.put(prefix, OAIXML.getMetadataPrefixElement(this.desc_doc, prefix, collection_version_format));
236
237 }
238 return true;
239 }
240
241 /**
242 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
243 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
244 */
245 public long getEarliestTimestamp() {
246 long timestamp = -1;
247
248 DBInfo oai_info = null;
249 if(oaiinf_db != null) {
250 // get internal record containing the earliest timestamp of the collection
251 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
252 if (oai_info == null) {
253 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
254 } else {
255 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
256 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
257 }
258 }
259 return timestamp;
260 }
261
262 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
263 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
264 for (int i=0; i<formats.getLength(); i++) {
265 Element format = (Element)formats.item(i);
266 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
267 if (prefix.equals(meta_name)) {
268 return format;
269 }
270 }
271 return null;
272 }
273
274 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
275 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
276
277 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
278 Document doc = element_list.getOwnerDocument();
279 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
280 if (coll_elements.getLength()==0) {
281 // no mappings to include
282 return;
283 }
284 for (int i=0; i<coll_elements.getLength(); i++) {
285 Element e = (Element)coll_elements.item(i);
286 String elem_name = e.getAttribute(GSXML.NAME_ATT);
287 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
288 if (main_elem == null) {
289 logger.error(elem_name+" not found in meta format, not using it");
290 } else {
291 element_list.replaceChild(doc.importNode(e, true),main_elem );
292 }
293 }
294 }
295
296 /** goes through all the mappings and makes a set of all collection
297 metadata names that could become an oai meta element - acts as
298 a reverse lookup for the mappings */
299 protected HashSet<String> getAllCollectionElements(Element meta_format) {
300 HashSet<String> meta_name_set = new HashSet<String>();
301 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
302 for (int i=0; i<elements.getLength(); i++) {
303 Element e = (Element)elements.item(i);
304 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
305 if (map == null) {
306 // there is no mapping, just use the element name
307 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
308 } else {
309 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
310 String[] name_array = list_of_names.split(",");
311 for (int j=0; j<name_array.length; j++) {
312 meta_name_set.add(name_array[j]);
313 }
314 }
315 }
316 return meta_name_set;
317 }
318
319 /** returns a specific service description */
320 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
321
322 if (service_id.equals(OAIXML.LIST_RECORDS)) {
323 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
324 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
325 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
326 return list_records;
327 }
328
329 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
330 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
331 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
332 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
333 return list_identifiers;
334 }
335 if (service_id.equals(OAIXML.LIST_SETS)) {
336 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
337 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
338 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
339 return list_sets;
340 }
341 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
342 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
343 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
344 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
345 return list_metadata_formats;
346 }
347
348 if (service_id.equals(OAIXML.GET_RECORD)) {
349 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
350 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
351 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
352 return get_record;
353 }
354
355 return null;
356 }
357
358 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
359 protected Element processListSets(Element req) {
360 return list_sets_response;
361 }
362 /** returns the actual record element used in the OAI GetRecord response */
363 protected Element processGetRecord(Element req) {
364 /** arguments:
365 identifier: required
366 metadataPrefix: required
367 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
368 */
369 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
370 HashMap<String, String> param_map = GSXML.getParamMap(params);
371
372 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
373 if (prefix == null || prefix.equals("")) {
374 //Just a double-check
375 logger.error("the value of metadataPrefix att is not present in the request.");
376 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
377 }
378
379 // check that we support this format
380 if (!format_response_map.containsKey(prefix)) {
381 logger.error("metadata prefix is not supported for collection "+this.coll_name);
382 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
383 }
384
385 Document doc = XMLConverter.newDOM();
386
387 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
388 boolean OID_is_deleted = false;
389 long millis = -1;
390
391 DBInfo oai_info = null;
392 if(oaiinf_db != null) {
393 oai_info = this.oaiinf_db.getInfo(oid);
394 if (oai_info == null) {
395 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
396 } else {
397
398 // indexdb doesn't have info on deleted docs, only oaiinf db does.
399 // So only oaiinfdb has timestamps for deleted docs
400 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
401 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
402 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
403 millis = Long.parseLong(timestamp)*1000; // in milliseconds
404
405 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
406 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
407 OID_is_deleted = true;
408 }
409 }
410 }
411
412 //get a DBInfo object of the identifier; if this identifier is not present in the database,
413 // null is returned.
414 DBInfo info = this.coll_db.getInfo(oid);
415 if (info == null) {
416 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
417 logger.error("OID: " + oid + " is not present in the collection index database.");
418 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
419 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
420 }
421 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
422 millis = getDateStampMillis(info);
423 }
424 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
425
426
427 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
428 Element get_record = doc.createElement(OAIXML.GET_RECORD);
429 get_record_response.appendChild(get_record);
430 Element record = doc.createElement(OAIXML.RECORD);
431 //compose the header element
432 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
433 if(!OID_is_deleted) {
434 //compose the metadata element
435 record.appendChild(createMetadataElement(doc, prefix, info));
436 }
437 get_record.appendChild(record);
438 return get_record_response;
439 }
440
441 /** return a list of records in specified set, containing metadata from specified prefix*/
442 protected Element processListRecords(Element req) {
443 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
444 }
445
446 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
447 protected Element processListIdentifiers(Element req) {
448 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
449 }
450
451 // Get a list of records/identifiers that match the parameters.
452 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
453 /** arguments:
454 metadataPrefix: required
455 * from: optional
456 * until: optional
457 * set: optional
458 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
459 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
460 */
461 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
462
463 if(params.getLength() == 0) {
464 logger.error("must at least have the metadataPrefix parameter, can't be none");
465 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
466 }
467
468 HashMap<String, String> param_map = GSXML.getParamMap(params);
469
470 String prefix = "";
471 Date from_date = null;
472 Date until_date = null;
473
474 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
475 //Just a double-check
476 logger.error("A param element containing the metadataPrefix is not present.");
477 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
478 }
479 prefix = param_map.get(OAIXML.METADATA_PREFIX);
480 if (prefix == null || prefix.equals("")) {
481 //Just a double-check
482 logger.error("the value of metadataPrefix att is not present in the request.");
483 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
484 }
485
486 if(param_map.containsKey(OAIXML.FROM)) {
487 String from = param_map.get(OAIXML.FROM);
488 from_date = OAIXML.getDate(from);
489 }
490 if(param_map.containsKey(OAIXML.UNTIL)) {
491 String until = param_map.get(OAIXML.UNTIL);
492 until_date = OAIXML.getDate(until);
493 }
494
495 if (!format_response_map.containsKey(prefix)) {
496 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
497 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
498 }
499
500 // get list of oids
501 ArrayList<String> oid_list = null;
502 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
503 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
504
505 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
506 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
507 oid_list = getChildrenIds(OAIXML.BROWSELIST);
508 }
509 }
510
511 if (oid_list == null) {
512 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
513 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
514 }
515 // all validation is done
516
517 // get the list of elements that are in this metadata prefix
518 HashSet<String> set_of_elems = format_elements_map.get(prefix);
519
520 Document doc = XMLConverter.newDOM();
521 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
522 Element list_items = doc.createElement(response_name);
523 list_items_response.appendChild(list_items);
524
525 for(int i=0; i<oid_list.size(); i++) {
526 String oid = oid_list.get(i);
527
528 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
529 continue;
530 }
531
532 boolean OID_is_deleted = false;
533 long millis = -1;
534
535 DBInfo oai_info = null;
536 if(oaiinf_db != null) {
537 oai_info = this.oaiinf_db.getInfo(oid);
538 if (oai_info == null) {
539 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
540 } else {
541
542 // indexdb doesn't have info on deleted docs, only oaiinf db does.
543 // So only oaiinfdb has timestamps for deleted docs
544 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
545 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
546 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
547 millis = Long.parseLong(timestamp)*1000; // in milliseconds
548
549 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
550 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
551 OID_is_deleted = true;
552 }
553 }
554 }
555 DBInfo info = this.coll_db.getInfo(oid);
556 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
557 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
558 logger.error("Collection database does not contain information about oid: " +oid);
559 }
560 }
561 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
562
563 millis = getDateStampMillis(info);
564 }
565
566 Date this_date = null;
567 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
568
569 if (millis == -1) {
570 if (from_date != null || until_date !=null) {
571 continue; // if this doc doesn't have a date for some reason, and
572 // we are doing a date range, then don't include it.
573 }
574 } else {
575 this_date = new Date(millis);
576 if (from_date != null) {
577 if(this_date.before(from_date)) {
578 continue;
579 }
580 }
581 if (until_date != null) {
582 if (this_date.after(until_date)) {
583 continue;
584 }
585 }
586 }
587
588 //compose the header element, which we'll be appending no matter what
589 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
590
591 if (include_metadata) { // doing ListRecords
592 // compose a record for adding header and metadata
593 Element record = doc.createElement(OAIXML.RECORD);
594 list_items.appendChild(record);
595 //insert the header element
596 record.appendChild(header);
597 //Now check that this id has metadata for the required prefix.
598 if (info != null && documentContainsMetadata(info, set_of_elems)) {
599 // YES, it does have some metadata for this prefix
600 //compose the metadata element
601 record.appendChild(createMetadataElement(doc, prefix, info));
602 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
603 } else { // doing ListIdentifiers
604 //append the header element
605 list_items.appendChild(header);
606 }
607
608 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
609
610 return list_items_response;
611
612 }
613
614
615 // have implemented setDescription as an element, instead of a container containing metadata
616 private boolean configureSetInfo() {
617
618 Document doc = XMLConverter.newDOM();
619 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
620 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
621 this.list_sets_response.appendChild(list_sets_elem);
622 String set_name = this.coll_name;
623 String set_description = null;
624 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
625 if (name_elem!=null) {
626 set_name = GSXML.getNodeText(name_elem);
627 if (set_name.equals("")) {
628 set_name = this.coll_name; // default to coll name if can't find one
629 }
630 }
631 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
632 if (description_elem!=null) {
633 set_description = GSXML.getNodeText(description_elem);
634 if (set_description.equals("")) {
635 set_description = null;
636 }
637 }
638 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
639 list_sets_elem.appendChild(coll_set);
640
641 // are we part of any super sets?
642 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
643 for (int i=0; i<super_set_list.getLength(); i++) {
644 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
645 if (super_name != null && !super_name.equals("")) {
646 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
647 }
648 }
649 return true;
650 }
651
652 /** create the metadata element used when processing ListRecords/GetRecord requests
653 */
654 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
655 // the <metadata> element
656 Element metadata = doc.createElement(OAIXML.METADATA);
657 // the <oai:dc namespace...> element
658 Element prfx_str_elem = (Element)doc.importNode(this.format_meta_elem_map.get(prefix), true);
659 metadata.appendChild(prfx_str_elem);
660
661 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
662 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
663 // for each element in the definition
664 for (int i=0; i<elements.getLength(); i++) {
665 Element e = (Element)elements.item(i);
666 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
667 if (map == null) {
668 // look up the element name
669 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
670 } else {
671 // we go though the list of names in the mapping
672 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
673 }
674 }
675 // output any metadata that is not just a simple mapping
676 addCustomMetadata(prfx_str_elem, prefix, info);
677 return metadata;
678 }
679
680 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
681 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
682 Vector<String> values = info.getMultiInfo(meta_name);
683 if (values != null && values.size()!=0) {
684 for (int i=0; i<values.size(); i++) {
685 addMetadataElement(meta_list_elem, meta_name, values.get(i));
686 }
687 }
688 }
689
690 /** more complicated addMetadata - can add multiple items. */
691 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
692 String[] names = name_list.split(",");
693 for (int i=0; i<names.length; i++) {
694 Vector<String> values;
695 // some special words
696 if (names[i].startsWith(OAIXML.GSF_LINK_PREFIX)) {
697 values = new Vector<String>();
698 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
699 String link_url = null;
700 if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_DOCUMENT)) {
701 link_url = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
702 } else if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_SOURCE)) {
703 String srcfile = info.getInfo("srclinkFile");
704 if (!srcfile.equals("")) {
705 link_url = base_url.replace("oaiserver", "") + "sites/"
706 + this.site_name
707 + "/collect/" + this.coll_name + "/index/assoc/"
708 + info.getInfo("assocfilepath") + "/" + srcfile;
709 }
710 }
711 if (link_url !=null) {
712 values.add(link_url);
713 }
714 } else {
715 values = info.getMultiInfo(names[i]);
716 }
717 if (values == null || values.size()==0) {
718 continue;
719 }
720 for (int j=0; j<values.size(); j++) {
721 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
722 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
723 return; // only want to add one value
724 }
725 }
726 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
727 return; // we have added all values of this meta elem
728 }
729 // otherwise, we will keep going through the list and add them all.
730 }
731 }
732
733 // specific metadata formats might need to do some custom metadata that is not
734 //just a standard mapping.
735 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
736
737
738 }
739
740 /** create the actual metadata element for the list */
741 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
742
743 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
744 meta_list_elem.appendChild(meta);
745 }
746
747
748 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
749 */
750 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
751
752 Element header = doc.createElement(OAIXML.HEADER);
753
754 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
755 if(deleted) {
756 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
757 // then the timestamp for deletion will be from oai-inf database
758 }
759
760 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
761 GSXML.setNodeText(identifier, coll_name + ":" + oid);
762 header.appendChild(identifier);
763 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
764 GSXML.setNodeText(set_spec, coll_name);
765 header.appendChild(set_spec);
766 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
767 GSXML.setNodeText(datestamp, oailastmodified);
768 header.appendChild(datestamp);
769 return header;
770 }
771
772 /** return the metadata information */
773 protected Element processListMetadataFormats(Element req) {
774 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
775 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
776 if (param == null) {
777 logger.error("An element containing the OID attribute not is present.");
778 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
779 }
780 String oid = param.getAttribute(GSXML.VALUE_ATT);
781 if (oid == null || oid.equals("")) {
782 logger.error("No OID is present in the request.");
783 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
784 }
785
786 /*
787 ArrayList<String> oid_list = null;
788 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
789 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
790
791 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
792 oid_list = getChildrenIds(OAIXML.BROWSELIST);
793 }
794 }
795 */
796 // assume meta formats are only for OIDs that have not been deleted
797 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
798 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
799 if (oid_list == null || oid_list.contains(oid) == false) {
800 logger.error("OID: " + oid + " is not present in the database.");
801 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
802// logger.error((new XMLConverter()).getPrettyString (e));
803 return e;
804 }
805
806 DBInfo info = null;
807 info = this.coll_db.getInfo(oid);
808 if (info == null) { //just double check
809 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
810 }
811
812 Document doc = XMLConverter.newDOM();
813 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
814
815 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
816 list_metadata_formats_response.appendChild(list_metadata_formats);
817 boolean has_meta_format = false;
818
819 // for each format in format_elements_map
820 Iterator<String> it = format_elements_map.keySet().iterator();
821 while (it.hasNext()) {
822 String format = it.next();
823 HashSet<String> set_of_elems = format_elements_map.get(format);
824 if (documentContainsMetadata(info, set_of_elems)) {
825 // add this format into the response
826 has_meta_format = true;
827 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
828 }
829 }
830
831 if (has_meta_format == false) {
832 logger.error("Specified metadata names are not contained in the database.");
833 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
834 } else {
835 return list_metadata_formats_response;
836 }
837 }
838
839 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
840 if (set_of_elems.size() == 0) {
841 return false;
842 }
843 Iterator<String> i = set_of_elems.iterator();
844 while (i.hasNext()) {
845 if (!info.getInfo(i.next()).equals("")) {
846 return true;
847 }
848 }
849 return false;
850 }
851
852 /** returns a list of the child ids in order, null if no children */
853 protected ArrayList<String> getChildrenIds(String node_id) {
854 DBInfo info = this.coll_db.getInfo(node_id);
855 if (info == null) {
856 return null;
857 }
858
859 String contains = info.getInfo("contains");
860 if (contains.equals("")) {
861 return null;
862 }
863 ArrayList<String> children = new ArrayList<String>();
864 StringTokenizer st = new StringTokenizer(contains, ";");
865 while (st.hasMoreTokens()) {
866 String child_id = st.nextToken().replaceAll("\"", node_id);
867 children.add(child_id);
868 }
869 return children;
870 }
871 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
872 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
873 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
874 * name which is mandatory.
875 */
876 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
877 if (metadata_names == null) return false;
878 logger.info("checking metadata names in db.");
879 for(int i=0; i<metadata_names.length; i++) {
880 int index = metadata_names[i].indexOf(",");
881 String meta_name = (index == -1) ? metadata_names[i] :
882 metadata_names[i].substring(index + 1);
883
884 if(info.getInfo(meta_name).equals("") == false) {
885 return true;
886 }
887 }
888 return false;
889 }
890
891 protected long getDateStampMillis(DBInfo info) {
892 // gs.OAIDateStamp is in YYYY-MM-DD
893 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
894 long millis = -1;
895 if (!time_stamp.equals("")) {
896 millis = OAIXML.getTime(time_stamp);
897 }
898 if (millis == -1) {
899 // oailastmodified is in seconds
900 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
901 if (!time_stamp.equals("")) {
902 millis = Long.parseLong(time_stamp)*1000;
903 }
904 }
905 return millis;
906
907
908 }
909}
910
911
Note: See TracBrowser for help on using the repository browser.