source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java

Last change on this file was 35225, checked in by kjdon, 3 years ago

OAI identifiers should be (I think??) oai:repository_id:coll_name:doc_id. So I have updated the code to use this instead of just coll_name:doc_id

File size: 37.6 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String repository_id = "";
63 protected String site_name = "";
64 protected String coll_name = "";
65
66 // set this up during configure
67 protected Element list_sets_response = null;
68
69 protected Element meta_formats_definition = null;
70 protected HashMap<String, HashSet<String>> format_elements_map = null;
71 protected HashMap<String, Element> format_response_map = null;
72 protected HashMap<String, Element> format_meta_elem_map = null;
73
74 protected String index_stem = "";
75 protected String infodb_type = "";
76
77 /** constructor */
78 public OAIPMH() {
79
80 }
81
82 public void cleanUp() {
83 super.cleanUp();//??
84
85 if(this.coll_db != null) {
86 this.coll_db.closeDatabase();
87 this.coll_db = null;
88 }
89 if (this.oaiinf_db != null){
90 this.oaiinf_db.closeDatabase();
91 }
92 }
93
94 /** configure this service
95 info is the OAIPMH service rack from collectionConfig.xml, and
96 extra_info is buildConfig.xml */
97 public boolean configure(Element info, Element extra_info) {
98 if (!super.configure(info, extra_info)){
99 logger.info("Configuring ServiceRack.java returns false.");
100 return false;
101 }
102
103 //get the names from ServiceRack.java
104 this.site_name = this.router.getSiteName();
105 this.coll_name = this.cluster_name;
106
107 logger.info("Configuring OAIPMH...");
108
109 this.config_info = info;
110
111 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
112 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
113
114 if (metadata_list != null) {
115
116 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
117
118 if (index_stem_elem != null) {
119 this.index_stem = GSXML.getNodeText(index_stem_elem);
120 }
121
122 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
123 if (infodb_type_elem != null) {
124 this.infodb_type = GSXML.getNodeText(infodb_type_elem);
125 }
126
127 }
128
129 if (index_stem == null || index_stem.equals("")) {
130 this.index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
131 }
132 if (infodb_type == null || infodb_type.equals("")) {
133 this.infodb_type = "gdbm"; // the default
134 }
135
136 // DB OPENING STUFF MOVED TO configureOAI(), because OAIPMH.configure() is called by the regular MessageRouter when this activates collections for the regular "library" servlet
137 // whereas OAIPMH.configureOAI() is only called by OAIMessageRouter when it activates collections for the "oaiserver" servlet (after OAIMessageRouter calls regular configure() first)
138 // We don't want the DBs opened twice: once by MessageRouter's call to OAIPMH.configure() and once by OAIMessageRouter calling OAIPMH.configure().
139
140 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
141 configureSetInfo();
142 // the short_service_info is used by the message router to find the method names,
143
144 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
145 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
146 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
147 this.short_service_info.appendChild(list_records);
148
149 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
150 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
151 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
152 this.short_service_info.appendChild(list_identifiers);
153
154 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
155 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
156 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
157 this.short_service_info.appendChild(list_sets);
158
159 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
160 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
161 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
162 this.short_service_info.appendChild(list_metadata_formats);
163
164 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
165 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
166 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
167 this.short_service_info.appendChild(get_record);
168
169 return true;
170 }
171
172 public boolean configureOAI(Element oai_config_elem) {
173 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
174 this.format_response_map = new HashMap<String, Element>();
175 this.format_elements_map = new HashMap<String, HashSet<String>>();
176 this.format_meta_elem_map = new HashMap<String, Element>();
177
178 // get the reposityIdentifier
179 Element ri = (Element)GSXML.getChildByTagName(oai_config_elem, OAIXML.REPOSITORY_IDENTIFIER);
180 if (ri != null) {
181 this.repository_id = GSXML.getNodeText(ri);
182 } else {
183 this.repository_id = "";
184 }
185
186 // for now, all we want is the metadata prefix description and the mapping list
187 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
188 if (main_lmf_elem == null) {
189 logger.error("No listMetadataFormats element found in OAIConfig.xml");
190 return false;
191 }
192 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
193 if (meta_formats_list.getLength() == 0) {
194 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
195 return false;
196 }
197
198 boolean found_meta_format = false;
199 for(int i=0; i<meta_formats_list.getLength(); i++) {
200 Element mf = (Element) meta_formats_list.item(i);
201 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
202 if (prefix.equals("")) {
203 logger.error("metadataFormat element had no metadataPrefix attribute");
204 continue;
205 }
206 // get the right format from OAIConfig
207 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
208 if (meta_format == null) {
209 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
210 continue;
211 }
212
213 // copy the format definition into our stored Element
214 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
215 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
216 this.meta_formats_definition.appendChild(collection_version_format);
217 // set up the response element for this format
218 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
219 // add in collection specific mappings
220 addCollectionMappings(collection_version_format, mf);
221 // now set up a list of all collection elements for reverse lookup of the mapping
222 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
223 format_meta_elem_map.put(prefix, OAIXML.getMetadataPrefixElement(this.desc_doc, prefix, collection_version_format));
224
225 } // end for
226
227 // Open the coll db and oai-inf db databases and store handles to them
228 coll_db = new SimpleCollectionDatabase(infodb_type);
229 if (!coll_db.databaseOK()) {
230 logger.error("Couldn't create the collection database of type "+infodb_type);
231 return false;
232 }
233
234 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
235 if (!oaiinf_db.databaseOK()) {
236 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
237 return false;
238 }
239
240
241 // Open databases for querying
242 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
243 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
244 logger.error("Could not open collection database!");
245 return false;
246 }
247 // the oaiinf_db is called oai-inf.<infodb_type_extension>
248 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
249 File oaiinfFile = new File(oaiinf_db_file);
250
251 if(!oaiinfFile.exists()) {
252 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
253 oaiinf_db = null;
254 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
255 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
256 oaiinf_db = null;
257 }
258
259 return true;
260 }
261
262 /**
263 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
264 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
265 */
266 public long getEarliestTimestamp() {
267 long timestamp = -1;
268
269 DBInfo oai_info = null;
270 if(oaiinf_db != null) {
271 // get internal record containing the earliest timestamp of the collection
272 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
273 if (oai_info == null) {
274 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
275 } else {
276 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
277 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
278 }
279 }
280 return timestamp;
281 }
282
283 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
284 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
285 for (int i=0; i<formats.getLength(); i++) {
286 Element format = (Element)formats.item(i);
287 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
288 if (prefix.equals(meta_name)) {
289 return format;
290 }
291 }
292 return null;
293 }
294
295 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
296 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
297
298 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
299 Document doc = element_list.getOwnerDocument();
300 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
301 if (coll_elements.getLength()==0) {
302 // no mappings to include
303 return;
304 }
305 for (int i=0; i<coll_elements.getLength(); i++) {
306 Element e = (Element)coll_elements.item(i);
307 String elem_name = e.getAttribute(GSXML.NAME_ATT);
308 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
309 if (main_elem == null) {
310 logger.error(elem_name+" not found in meta format, not using it");
311 } else {
312 element_list.replaceChild(doc.importNode(e, true),main_elem );
313 }
314 }
315 }
316
317 /** goes through all the mappings and makes a set of all collection
318 metadata names that could become an oai meta element - acts as
319 a reverse lookup for the mappings */
320 protected HashSet<String> getAllCollectionElements(Element meta_format) {
321 HashSet<String> meta_name_set = new HashSet<String>();
322 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
323 for (int i=0; i<elements.getLength(); i++) {
324 Element e = (Element)elements.item(i);
325 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
326 if (map == null) {
327 // there is no mapping, just use the element name
328 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
329 } else {
330 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
331 String[] name_array = list_of_names.split(",");
332 for (int j=0; j<name_array.length; j++) {
333 meta_name_set.add(name_array[j]);
334 }
335 }
336 }
337 return meta_name_set;
338 }
339
340 /** returns a specific service description */
341 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
342
343 if (service_id.equals(OAIXML.LIST_RECORDS)) {
344 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
345 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
346 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
347 return list_records;
348 }
349
350 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
351 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
352 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
353 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
354 return list_identifiers;
355 }
356 if (service_id.equals(OAIXML.LIST_SETS)) {
357 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
358 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
359 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
360 return list_sets;
361 }
362 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
363 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
364 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
365 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
366 return list_metadata_formats;
367 }
368
369 if (service_id.equals(OAIXML.GET_RECORD)) {
370 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
371 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
372 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
373 return get_record;
374 }
375
376 return null;
377 }
378
379 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
380 protected Element processListSets(Element req) {
381 return list_sets_response;
382 }
383 /** returns the actual record element used in the OAI GetRecord response */
384 protected Element processGetRecord(Element req) {
385 /** arguments:
386 identifier: required
387 metadataPrefix: required
388 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
389 */
390 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
391 HashMap<String, String> param_map = GSXML.getParamMap(params);
392
393 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
394 if (prefix == null || prefix.equals("")) {
395 //Just a double-check
396 logger.error("the value of metadataPrefix att is not present in the request.");
397 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
398 }
399
400 // check that we support this format
401 if (!format_response_map.containsKey(prefix)) {
402 logger.error("metadata prefix is not supported for collection "+this.coll_name);
403 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
404 }
405
406 Document doc = XMLConverter.newDOM();
407
408 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
409 boolean OID_is_deleted = false;
410 long millis = -1;
411
412 DBInfo oai_info = null;
413 if(oaiinf_db != null) {
414 oai_info = this.oaiinf_db.getInfo(oid);
415 if (oai_info == null) {
416 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
417 } else {
418
419 // indexdb doesn't have info on deleted docs, only oaiinf db does.
420 // So only oaiinfdb has timestamps for deleted docs
421 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
422 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
423 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
424 millis = Long.parseLong(timestamp)*1000; // in milliseconds
425
426 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
427 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
428 OID_is_deleted = true;
429 }
430 }
431 }
432
433 //get a DBInfo object of the identifier; if this identifier is not present in the database,
434 // null is returned.
435 DBInfo info = this.coll_db.getInfo(oid);
436 if (info == null) {
437 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
438 logger.error("OID: " + oid + " is not present in the collection index database.");
439 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
440 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
441 }
442 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
443 millis = getDateStampMillis(info);
444 }
445 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
446
447
448 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
449 Element get_record = doc.createElement(OAIXML.GET_RECORD);
450 get_record_response.appendChild(get_record);
451 Element record = doc.createElement(OAIXML.RECORD);
452 //compose the header element
453 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
454 if(!OID_is_deleted) {
455 //compose the metadata element
456 record.appendChild(createMetadataElement(doc, prefix, info));
457 }
458 get_record.appendChild(record);
459 return get_record_response;
460 }
461
462 /** return a list of records in specified set, containing metadata from specified prefix*/
463 protected Element processListRecords(Element req) {
464 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
465 }
466
467 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
468 protected Element processListIdentifiers(Element req) {
469 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
470 }
471
472 // Get a list of records/identifiers that match the parameters.
473 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
474 /** arguments:
475 metadataPrefix: required
476 * from: optional
477 * until: optional
478 * set: optional
479 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
480 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
481 */
482 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
483
484 if(params.getLength() == 0) {
485 logger.error("must at least have the metadataPrefix parameter, can't be none");
486 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
487 }
488
489 HashMap<String, String> param_map = GSXML.getParamMap(params);
490
491 String prefix = "";
492 Date from_date = null;
493 Date until_date = null;
494
495 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
496 //Just a double-check
497 logger.error("A param element containing the metadataPrefix is not present.");
498 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
499 }
500 prefix = param_map.get(OAIXML.METADATA_PREFIX);
501 if (prefix == null || prefix.equals("")) {
502 //Just a double-check
503 logger.error("the value of metadataPrefix att is not present in the request.");
504 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
505 }
506
507 if(param_map.containsKey(OAIXML.FROM)) {
508 String from = param_map.get(OAIXML.FROM);
509 from_date = OAIXML.getDate(from);
510 }
511 if(param_map.containsKey(OAIXML.UNTIL)) {
512 String until = param_map.get(OAIXML.UNTIL);
513 until_date = OAIXML.getDate(until);
514 }
515
516 if (!format_response_map.containsKey(prefix)) {
517 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
518 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
519 }
520
521 // get list of oids
522 ArrayList<String> oid_list = null;
523 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
524 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
525
526 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
527 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
528 oid_list = getChildrenIds(OAIXML.BROWSELIST);
529 }
530 }
531
532 if (oid_list == null) {
533 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
534 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
535 }
536 // all validation is done
537
538 // get the list of elements that are in this metadata prefix
539 HashSet<String> set_of_elems = format_elements_map.get(prefix);
540
541 Document doc = XMLConverter.newDOM();
542 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
543 Element list_items = doc.createElement(response_name);
544 list_items_response.appendChild(list_items);
545
546 for(int i=0; i<oid_list.size(); i++) {
547 String oid = oid_list.get(i);
548
549 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
550 continue;
551 }
552
553 boolean OID_is_deleted = false;
554 long millis = -1;
555
556 DBInfo oai_info = null;
557 if(oaiinf_db != null) {
558 oai_info = this.oaiinf_db.getInfo(oid);
559 if (oai_info == null) {
560 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
561 } else {
562
563 // indexdb doesn't have info on deleted docs, only oaiinf db does.
564 // So only oaiinfdb has timestamps for deleted docs
565 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
566 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
567 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
568 millis = Long.parseLong(timestamp)*1000; // in milliseconds
569
570 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
571 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
572 OID_is_deleted = true;
573 }
574 }
575 }
576 DBInfo info = this.coll_db.getInfo(oid);
577 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
578 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
579 logger.error("Collection database does not contain information about oid: " +oid);
580 }
581 }
582 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
583
584 millis = getDateStampMillis(info);
585 }
586
587 Date this_date = null;
588 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
589
590 if (millis == -1) {
591 if (from_date != null || until_date !=null) {
592 continue; // if this doc doesn't have a date for some reason, and
593 // we are doing a date range, then don't include it.
594 }
595 } else {
596 this_date = new Date(millis);
597 if (from_date != null) {
598 if(this_date.before(from_date)) {
599 continue;
600 }
601 }
602 if (until_date != null) {
603 if (this_date.after(until_date)) {
604 continue;
605 }
606 }
607 }
608
609 //compose the header element, which we'll be appending no matter what
610 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
611
612 if (include_metadata) { // doing ListRecords
613 // compose a record for adding header and metadata
614 Element record = doc.createElement(OAIXML.RECORD);
615 list_items.appendChild(record);
616 //insert the header element
617 record.appendChild(header);
618 //Now check that this id has metadata for the required prefix.
619 if (info != null && documentContainsMetadata(info, set_of_elems)) {
620 // YES, it does have some metadata for this prefix
621 //compose the metadata element
622 record.appendChild(createMetadataElement(doc, prefix, info));
623 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
624 } else { // doing ListIdentifiers
625 //append the header element
626 list_items.appendChild(header);
627 }
628
629 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
630
631 return list_items_response;
632
633 }
634
635
636 // have implemented setDescription as an element, instead of a container containing metadata
637 private boolean configureSetInfo() {
638
639 Document doc = XMLConverter.newDOM();
640 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
641 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
642 this.list_sets_response.appendChild(list_sets_elem);
643 String set_name = this.coll_name;
644 String set_description = null;
645 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
646 if (name_elem!=null) {
647 set_name = GSXML.getNodeText(name_elem);
648 if (set_name.equals("")) {
649 set_name = this.coll_name; // default to coll name if can't find one
650 }
651 }
652 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
653 if (description_elem!=null) {
654 set_description = GSXML.getNodeText(description_elem);
655 if (set_description.equals("")) {
656 set_description = null;
657 }
658 }
659 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
660 list_sets_elem.appendChild(coll_set);
661
662 // are we part of any super sets?
663 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
664 for (int i=0; i<super_set_list.getLength(); i++) {
665 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
666 if (super_name != null && !super_name.equals("")) {
667 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
668 }
669 }
670 return true;
671 }
672
673 /** create the metadata element used when processing ListRecords/GetRecord requests
674 */
675 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
676 // the <metadata> element
677 Element metadata = doc.createElement(OAIXML.METADATA);
678 // the <oai:dc namespace...> element
679 Element prfx_str_elem = (Element)doc.importNode(this.format_meta_elem_map.get(prefix), true);
680 metadata.appendChild(prfx_str_elem);
681
682 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
683 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
684 // for each element in the definition
685 for (int i=0; i<elements.getLength(); i++) {
686 Element e = (Element)elements.item(i);
687 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
688 if (map == null) {
689 // look up the element name
690 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
691 } else {
692 // we go though the list of names in the mapping
693 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
694 }
695 }
696 // output any metadata that is not just a simple mapping
697 addCustomMetadata(prfx_str_elem, prefix, info);
698 return metadata;
699 }
700
701 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
702 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
703 Vector<String> values = info.getMultiInfo(meta_name);
704 if (values != null && values.size()!=0) {
705 for (int i=0; i<values.size(); i++) {
706 addMetadataElement(meta_list_elem, meta_name, values.get(i));
707 }
708 }
709 }
710
711 /** more complicated addMetadata - can add multiple items. */
712 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
713 String[] names = name_list.split(",");
714 for (int i=0; i<names.length; i++) {
715 Vector<String> values;
716 // some special words
717 if (names[i].startsWith(OAIXML.GSF_LINK_PREFIX)) {
718 values = new Vector<String>();
719 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
720 base_url = base_url.substring(0, base_url.lastIndexOf("/")+1);
721 String link_url = null;
722 if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_DOCUMENT)) {
723 link_url = base_url + "library/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
724 } else if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_PRINT)) {
725 link_url = base_url + "library/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier")+"/print";
726 } else if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_SOURCE)) {
727 String srcfile = info.getInfo("srclinkFile");
728 if (!srcfile.equals("")) {
729 link_url = base_url + "library/sites/"
730 + this.site_name
731 + "/collect/" + this.coll_name + "/index/assoc/"
732 + info.getInfo("assocfilepath") + "/" + srcfile;
733 }
734 }
735 if (link_url !=null) {
736 values.add(link_url);
737 }
738 } else {
739 values = info.getMultiInfo(names[i]);
740 }
741 if (values == null || values.size()==0) {
742 continue;
743 }
744 for (int j=0; j<values.size(); j++) {
745 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
746 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
747 return; // only want to add one value
748 }
749 }
750 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
751 return; // we have added all values of this meta elem
752 }
753 // otherwise, we will keep going through the list and add them all.
754 }
755 }
756
757 // specific metadata formats might need to do some custom metadata that is not
758 //just a standard mapping.
759 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
760
761
762 }
763
764 /** create the actual metadata element for the list */
765 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
766
767 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
768 meta_list_elem.appendChild(meta);
769 }
770
771
772 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
773 */
774 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
775
776 Element header = doc.createElement(OAIXML.HEADER);
777
778 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
779 if(deleted) {
780 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
781 // then the timestamp for deletion will be from oai-inf database
782 }
783
784 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
785 GSXML.setNodeText(identifier, OAIXML.createOAIIdentifier(repository_id,coll_name,oid));
786 header.appendChild(identifier);
787 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
788 GSXML.setNodeText(set_spec, coll_name);
789 header.appendChild(set_spec);
790 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
791 GSXML.setNodeText(datestamp, oailastmodified);
792 header.appendChild(datestamp);
793 return header;
794 }
795
796 /** return the metadata information */
797 protected Element processListMetadataFormats(Element req) {
798 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
799 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
800 if (param == null) {
801 logger.error("An element containing the OID attribute not is present.");
802 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
803 }
804 String oid = param.getAttribute(GSXML.VALUE_ATT);
805 if (oid == null || oid.equals("")) {
806 logger.error("No OID is present in the request.");
807 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
808 }
809
810 /*
811 ArrayList<String> oid_list = null;
812 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
813 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
814
815 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
816 oid_list = getChildrenIds(OAIXML.BROWSELIST);
817 }
818 }
819 */
820 // assume meta formats are only for OIDs that have not been deleted
821 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
822 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
823 if (oid_list == null || oid_list.contains(oid) == false) {
824 logger.error("OID: " + oid + " is not present in the database.");
825 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
826// logger.error((new XMLConverter()).getPrettyString (e));
827 return e;
828 }
829
830 DBInfo info = null;
831 info = this.coll_db.getInfo(oid);
832 if (info == null) { //just double check
833 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
834 }
835
836 Document doc = XMLConverter.newDOM();
837 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
838
839 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
840 list_metadata_formats_response.appendChild(list_metadata_formats);
841 boolean has_meta_format = false;
842
843 // for each format in format_elements_map
844 Iterator<String> it = format_elements_map.keySet().iterator();
845 while (it.hasNext()) {
846 String format = it.next();
847 HashSet<String> set_of_elems = format_elements_map.get(format);
848 if (documentContainsMetadata(info, set_of_elems)) {
849 // add this format into the response
850 has_meta_format = true;
851 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
852 }
853 }
854
855 if (has_meta_format == false) {
856 logger.error("Specified metadata names are not contained in the database.");
857 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
858 } else {
859 return list_metadata_formats_response;
860 }
861 }
862
863 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
864 if (set_of_elems.size() == 0) {
865 return false;
866 }
867 Iterator<String> i = set_of_elems.iterator();
868 while (i.hasNext()) {
869 if (!info.getInfo(i.next()).equals("")) {
870 return true;
871 }
872 }
873 return false;
874 }
875
876 /** returns a list of the child ids in order, null if no children */
877 protected ArrayList<String> getChildrenIds(String node_id) {
878 DBInfo info = this.coll_db.getInfo(node_id);
879 if (info == null) {
880 return null;
881 }
882
883 String contains = info.getInfo("contains");
884 if (contains.equals("")) {
885 return null;
886 }
887 ArrayList<String> children = new ArrayList<String>();
888 StringTokenizer st = new StringTokenizer(contains, ";");
889 while (st.hasMoreTokens()) {
890 String child_id = st.nextToken().replaceAll("\"", node_id);
891 children.add(child_id);
892 }
893 return children;
894 }
895 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
896 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
897 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
898 * name which is mandatory.
899 */
900 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
901 if (metadata_names == null) return false;
902 logger.info("checking metadata names in db.");
903 for(int i=0; i<metadata_names.length; i++) {
904 int index = metadata_names[i].indexOf(",");
905 String meta_name = (index == -1) ? metadata_names[i] :
906 metadata_names[i].substring(index + 1);
907
908 if(info.getInfo(meta_name).equals("") == false) {
909 return true;
910 }
911 }
912 return false;
913 }
914
915 protected long getDateStampMillis(DBInfo info) {
916 // gs.OAIDateStamp is in YYYY-MM-DD
917 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
918 long millis = -1;
919 if (!time_stamp.equals("")) {
920 millis = OAIXML.getTime(time_stamp);
921 }
922 if (millis == -1) {
923 // oailastmodified is in seconds
924 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
925 if (!time_stamp.equals("")) {
926 millis = Long.parseLong(time_stamp)*1000;
927 }
928 }
929 return millis;
930
931
932 }
933}
934
935
Note: See TracBrowser for help on using the repository browser.