source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 32828

Last change on this file since 32828 was 32828, checked in by ak19, 5 years ago

The major change is that opening the coll db and oai-inf db is moved from OAIPMH.configure() into OAIPMH.configureOAI() since we don't want to end up with 2 instances of DB handles: once when the MessageRouter of the library servlet calls configure() on a collection's services including OAIPMH and once when the OAIMessageRouter of the oaiserver servlet calls configure() (before calling configureOAI()) on its OAICollections' OAIPMH services. Instead, the dbs are only opened and the handles stored once, when configureOAI() is called on OAIPMH by OAIMessageRouter when the OAIServer servlet is first visited. Minor accompanying changes are that index_stem and infodb_type need to be member vars instead of local to configure() now.

File size: 37.0 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 protected HashMap<String, Element> format_meta_elem_map = null;
72
73 protected String index_stem = "";
74 protected String infodb_type = "";
75
76 /** constructor */
77 public OAIPMH() {
78
79 }
80
81 public void cleanUp() {
82 super.cleanUp();//??
83 this.coll_db.closeDatabase();
84 if (this.oaiinf_db != null){
85 this.oaiinf_db.closeDatabase();
86 }
87 }
88
89 /** configure this service
90 info is the OAIPMH service rack from collectionConfig.xml, and
91 extra_info is buildConfig.xml */
92 public boolean configure(Element info, Element extra_info) {
93 if (!super.configure(info, extra_info)){
94 logger.info("Configuring ServiceRack.java returns false.");
95 return false;
96 }
97
98 //get the names from ServiceRack.java
99 this.site_name = this.router.getSiteName();
100 this.coll_name = this.cluster_name;
101
102 logger.info("Configuring OAIPMH...");
103
104 this.config_info = info;
105
106 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
107 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
108
109 if (metadata_list != null) {
110
111 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
112
113 if (index_stem_elem != null) {
114 this.index_stem = GSXML.getNodeText(index_stem_elem);
115 }
116
117 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
118 if (infodb_type_elem != null) {
119 this.infodb_type = GSXML.getNodeText(infodb_type_elem);
120 }
121
122 }
123
124 if (index_stem == null || index_stem.equals("")) {
125 this.index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
126 }
127 if (infodb_type == null || infodb_type.equals("")) {
128 this.infodb_type = "gdbm"; // the default
129 }
130
131 // DB OPENING STUFF MOVED TO configureOAI(), because OAIPMH.configure() is called by the regular MessageRouter when this activates collections for the regular "library" servlet
132 // whereas OAIPMH.configureOAI() is only called by OAIMessageRouter when it activates collections for the "oaiserver" servlet (after OAIMessageRouter calls regular configure() first)
133 // We don't want the DBs opened twice: once by MessageRouter's call to OAIPMH.configure() and once by OAIMessageRouter calling OAIPMH.configure().
134
135 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
136 configureSetInfo();
137 // the short_service_info is used by the message router to find the method names,
138
139 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
140 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
141 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
142 this.short_service_info.appendChild(list_records);
143
144 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
145 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
146 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
147 this.short_service_info.appendChild(list_identifiers);
148
149 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
150 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
151 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
152 this.short_service_info.appendChild(list_sets);
153
154 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
155 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
156 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
157 this.short_service_info.appendChild(list_metadata_formats);
158
159 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
160 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
161 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
162 this.short_service_info.appendChild(get_record);
163
164 return true;
165 }
166
167 public boolean configureOAI(Element oai_config_elem) {
168 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
169 this.format_response_map = new HashMap<String, Element>();
170 this.format_elements_map = new HashMap<String, HashSet<String>>();
171 this.format_meta_elem_map = new HashMap<String, Element>();
172
173 // for now, all we want is the metadata prefix description and the mapping list
174 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
175 if (main_lmf_elem == null) {
176 logger.error("No listMetadataFormats element found in OAIConfig.xml");
177 return false;
178 }
179 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
180 if (meta_formats_list.getLength() == 0) {
181 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
182 return false;
183 }
184
185 boolean found_meta_format = false;
186 for(int i=0; i<meta_formats_list.getLength(); i++) {
187 Element mf = (Element) meta_formats_list.item(i);
188 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
189 if (prefix.equals("")) {
190 logger.error("metadataFormat element had no metadataPrefix attribute");
191 continue;
192 }
193 // get the right format from OAICOnfig
194 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
195 if (meta_format == null) {
196 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
197 continue;
198 }
199
200 // copy the format definition into our stored Element
201 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
202 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
203 this.meta_formats_definition.appendChild(collection_version_format);
204 // set up the response element for this format
205 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
206 // add in collection specific mappings
207 addCollectionMappings(collection_version_format, mf);
208 // now set up a list of all collection elements for reverse lookup of the mapping
209 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
210 format_meta_elem_map.put(prefix, OAIXML.getMetadataPrefixElement(this.desc_doc, prefix, collection_version_format));
211
212 } // end for
213
214 // Open the coll db and oai-inf db databases and store handles to them
215 coll_db = new SimpleCollectionDatabase(infodb_type);
216 if (!coll_db.databaseOK()) {
217 logger.error("Couldn't create the collection database of type "+infodb_type);
218 return false;
219 }
220
221 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
222 if (!oaiinf_db.databaseOK()) {
223 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
224 return false;
225 }
226
227
228 // Open databases for querying
229 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
230 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
231 logger.error("Could not open collection database!");
232 return false;
233 }
234 // the oaiinf_db is called oai-inf.<infodb_type_extension>
235 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
236 File oaiinfFile = new File(oaiinf_db_file);
237
238 if(!oaiinfFile.exists()) {
239 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
240 oaiinf_db = null;
241 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
242 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
243 oaiinf_db = null;
244 }
245
246 return true;
247 }
248
249 /**
250 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
251 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
252 */
253 public long getEarliestTimestamp() {
254 long timestamp = -1;
255
256 DBInfo oai_info = null;
257 if(oaiinf_db != null) {
258 // get internal record containing the earliest timestamp of the collection
259 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
260 if (oai_info == null) {
261 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
262 } else {
263 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
264 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
265 }
266 }
267 return timestamp;
268 }
269
270 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
271 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
272 for (int i=0; i<formats.getLength(); i++) {
273 Element format = (Element)formats.item(i);
274 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
275 if (prefix.equals(meta_name)) {
276 return format;
277 }
278 }
279 return null;
280 }
281
282 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
283 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
284
285 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
286 Document doc = element_list.getOwnerDocument();
287 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
288 if (coll_elements.getLength()==0) {
289 // no mappings to include
290 return;
291 }
292 for (int i=0; i<coll_elements.getLength(); i++) {
293 Element e = (Element)coll_elements.item(i);
294 String elem_name = e.getAttribute(GSXML.NAME_ATT);
295 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
296 if (main_elem == null) {
297 logger.error(elem_name+" not found in meta format, not using it");
298 } else {
299 element_list.replaceChild(doc.importNode(e, true),main_elem );
300 }
301 }
302 }
303
304 /** goes through all the mappings and makes a set of all collection
305 metadata names that could become an oai meta element - acts as
306 a reverse lookup for the mappings */
307 protected HashSet<String> getAllCollectionElements(Element meta_format) {
308 HashSet<String> meta_name_set = new HashSet<String>();
309 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
310 for (int i=0; i<elements.getLength(); i++) {
311 Element e = (Element)elements.item(i);
312 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
313 if (map == null) {
314 // there is no mapping, just use the element name
315 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
316 } else {
317 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
318 String[] name_array = list_of_names.split(",");
319 for (int j=0; j<name_array.length; j++) {
320 meta_name_set.add(name_array[j]);
321 }
322 }
323 }
324 return meta_name_set;
325 }
326
327 /** returns a specific service description */
328 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
329
330 if (service_id.equals(OAIXML.LIST_RECORDS)) {
331 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
332 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
333 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
334 return list_records;
335 }
336
337 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
338 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
339 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
340 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
341 return list_identifiers;
342 }
343 if (service_id.equals(OAIXML.LIST_SETS)) {
344 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
345 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
346 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
347 return list_sets;
348 }
349 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
350 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
351 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
352 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
353 return list_metadata_formats;
354 }
355
356 if (service_id.equals(OAIXML.GET_RECORD)) {
357 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
358 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
359 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
360 return get_record;
361 }
362
363 return null;
364 }
365
366 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
367 protected Element processListSets(Element req) {
368 return list_sets_response;
369 }
370 /** returns the actual record element used in the OAI GetRecord response */
371 protected Element processGetRecord(Element req) {
372 /** arguments:
373 identifier: required
374 metadataPrefix: required
375 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
376 */
377 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
378 HashMap<String, String> param_map = GSXML.getParamMap(params);
379
380 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
381 if (prefix == null || prefix.equals("")) {
382 //Just a double-check
383 logger.error("the value of metadataPrefix att is not present in the request.");
384 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
385 }
386
387 // check that we support this format
388 if (!format_response_map.containsKey(prefix)) {
389 logger.error("metadata prefix is not supported for collection "+this.coll_name);
390 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
391 }
392
393 Document doc = XMLConverter.newDOM();
394
395 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
396 boolean OID_is_deleted = false;
397 long millis = -1;
398
399 DBInfo oai_info = null;
400 if(oaiinf_db != null) {
401 oai_info = this.oaiinf_db.getInfo(oid);
402 if (oai_info == null) {
403 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
404 } else {
405
406 // indexdb doesn't have info on deleted docs, only oaiinf db does.
407 // So only oaiinfdb has timestamps for deleted docs
408 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
409 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
410 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
411 millis = Long.parseLong(timestamp)*1000; // in milliseconds
412
413 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
414 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
415 OID_is_deleted = true;
416 }
417 }
418 }
419
420 //get a DBInfo object of the identifier; if this identifier is not present in the database,
421 // null is returned.
422 DBInfo info = this.coll_db.getInfo(oid);
423 if (info == null) {
424 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
425 logger.error("OID: " + oid + " is not present in the collection index database.");
426 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
427 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
428 }
429 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
430 millis = getDateStampMillis(info);
431 }
432 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
433
434
435 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
436 Element get_record = doc.createElement(OAIXML.GET_RECORD);
437 get_record_response.appendChild(get_record);
438 Element record = doc.createElement(OAIXML.RECORD);
439 //compose the header element
440 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
441 if(!OID_is_deleted) {
442 //compose the metadata element
443 record.appendChild(createMetadataElement(doc, prefix, info));
444 }
445 get_record.appendChild(record);
446 return get_record_response;
447 }
448
449 /** return a list of records in specified set, containing metadata from specified prefix*/
450 protected Element processListRecords(Element req) {
451 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
452 }
453
454 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
455 protected Element processListIdentifiers(Element req) {
456 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
457 }
458
459 // Get a list of records/identifiers that match the parameters.
460 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
461 /** arguments:
462 metadataPrefix: required
463 * from: optional
464 * until: optional
465 * set: optional
466 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
467 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
468 */
469 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
470
471 if(params.getLength() == 0) {
472 logger.error("must at least have the metadataPrefix parameter, can't be none");
473 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
474 }
475
476 HashMap<String, String> param_map = GSXML.getParamMap(params);
477
478 String prefix = "";
479 Date from_date = null;
480 Date until_date = null;
481
482 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
483 //Just a double-check
484 logger.error("A param element containing the metadataPrefix is not present.");
485 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
486 }
487 prefix = param_map.get(OAIXML.METADATA_PREFIX);
488 if (prefix == null || prefix.equals("")) {
489 //Just a double-check
490 logger.error("the value of metadataPrefix att is not present in the request.");
491 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
492 }
493
494 if(param_map.containsKey(OAIXML.FROM)) {
495 String from = param_map.get(OAIXML.FROM);
496 from_date = OAIXML.getDate(from);
497 }
498 if(param_map.containsKey(OAIXML.UNTIL)) {
499 String until = param_map.get(OAIXML.UNTIL);
500 until_date = OAIXML.getDate(until);
501 }
502
503 if (!format_response_map.containsKey(prefix)) {
504 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
505 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
506 }
507
508 // get list of oids
509 ArrayList<String> oid_list = null;
510 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
511 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
512
513 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
514 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
515 oid_list = getChildrenIds(OAIXML.BROWSELIST);
516 }
517 }
518
519 if (oid_list == null) {
520 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
521 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
522 }
523 // all validation is done
524
525 // get the list of elements that are in this metadata prefix
526 HashSet<String> set_of_elems = format_elements_map.get(prefix);
527
528 Document doc = XMLConverter.newDOM();
529 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
530 Element list_items = doc.createElement(response_name);
531 list_items_response.appendChild(list_items);
532
533 for(int i=0; i<oid_list.size(); i++) {
534 String oid = oid_list.get(i);
535
536 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
537 continue;
538 }
539
540 boolean OID_is_deleted = false;
541 long millis = -1;
542
543 DBInfo oai_info = null;
544 if(oaiinf_db != null) {
545 oai_info = this.oaiinf_db.getInfo(oid);
546 if (oai_info == null) {
547 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
548 } else {
549
550 // indexdb doesn't have info on deleted docs, only oaiinf db does.
551 // So only oaiinfdb has timestamps for deleted docs
552 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
553 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
554 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
555 millis = Long.parseLong(timestamp)*1000; // in milliseconds
556
557 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
558 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
559 OID_is_deleted = true;
560 }
561 }
562 }
563 DBInfo info = this.coll_db.getInfo(oid);
564 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
565 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
566 logger.error("Collection database does not contain information about oid: " +oid);
567 }
568 }
569 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
570
571 millis = getDateStampMillis(info);
572 }
573
574 Date this_date = null;
575 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
576
577 if (millis == -1) {
578 if (from_date != null || until_date !=null) {
579 continue; // if this doc doesn't have a date for some reason, and
580 // we are doing a date range, then don't include it.
581 }
582 } else {
583 this_date = new Date(millis);
584 if (from_date != null) {
585 if(this_date.before(from_date)) {
586 continue;
587 }
588 }
589 if (until_date != null) {
590 if (this_date.after(until_date)) {
591 continue;
592 }
593 }
594 }
595
596 //compose the header element, which we'll be appending no matter what
597 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
598
599 if (include_metadata) { // doing ListRecords
600 // compose a record for adding header and metadata
601 Element record = doc.createElement(OAIXML.RECORD);
602 list_items.appendChild(record);
603 //insert the header element
604 record.appendChild(header);
605 //Now check that this id has metadata for the required prefix.
606 if (info != null && documentContainsMetadata(info, set_of_elems)) {
607 // YES, it does have some metadata for this prefix
608 //compose the metadata element
609 record.appendChild(createMetadataElement(doc, prefix, info));
610 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
611 } else { // doing ListIdentifiers
612 //append the header element
613 list_items.appendChild(header);
614 }
615
616 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
617
618 return list_items_response;
619
620 }
621
622
623 // have implemented setDescription as an element, instead of a container containing metadata
624 private boolean configureSetInfo() {
625
626 Document doc = XMLConverter.newDOM();
627 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
628 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
629 this.list_sets_response.appendChild(list_sets_elem);
630 String set_name = this.coll_name;
631 String set_description = null;
632 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
633 if (name_elem!=null) {
634 set_name = GSXML.getNodeText(name_elem);
635 if (set_name.equals("")) {
636 set_name = this.coll_name; // default to coll name if can't find one
637 }
638 }
639 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
640 if (description_elem!=null) {
641 set_description = GSXML.getNodeText(description_elem);
642 if (set_description.equals("")) {
643 set_description = null;
644 }
645 }
646 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
647 list_sets_elem.appendChild(coll_set);
648
649 // are we part of any super sets?
650 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
651 for (int i=0; i<super_set_list.getLength(); i++) {
652 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
653 if (super_name != null && !super_name.equals("")) {
654 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
655 }
656 }
657 return true;
658 }
659
660 /** create the metadata element used when processing ListRecords/GetRecord requests
661 */
662 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
663 // the <metadata> element
664 Element metadata = doc.createElement(OAIXML.METADATA);
665 // the <oai:dc namespace...> element
666 Element prfx_str_elem = (Element)doc.importNode(this.format_meta_elem_map.get(prefix), true);
667 metadata.appendChild(prfx_str_elem);
668
669 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
670 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
671 // for each element in the definition
672 for (int i=0; i<elements.getLength(); i++) {
673 Element e = (Element)elements.item(i);
674 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
675 if (map == null) {
676 // look up the element name
677 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
678 } else {
679 // we go though the list of names in the mapping
680 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
681 }
682 }
683 // output any metadata that is not just a simple mapping
684 addCustomMetadata(prfx_str_elem, prefix, info);
685 return metadata;
686 }
687
688 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
689 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
690 Vector<String> values = info.getMultiInfo(meta_name);
691 if (values != null && values.size()!=0) {
692 for (int i=0; i<values.size(); i++) {
693 addMetadataElement(meta_list_elem, meta_name, values.get(i));
694 }
695 }
696 }
697
698 /** more complicated addMetadata - can add multiple items. */
699 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
700 String[] names = name_list.split(",");
701 for (int i=0; i<names.length; i++) {
702 Vector<String> values;
703 // some special words
704 if (names[i].startsWith(OAIXML.GSF_LINK_PREFIX)) {
705 values = new Vector<String>();
706 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
707 String link_url = null;
708 if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_DOCUMENT)) {
709 link_url = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
710 } else if (names[i].equals(OAIXML.GSF_LINK_PREFIX+OAIXML.LINK_TYPE_SOURCE)) {
711 String srcfile = info.getInfo("srclinkFile");
712 if (!srcfile.equals("")) {
713 link_url = base_url.replace("oaiserver", "") + "sites/"
714 + this.site_name
715 + "/collect/" + this.coll_name + "/index/assoc/"
716 + info.getInfo("assocfilepath") + "/" + srcfile;
717 }
718 }
719 if (link_url !=null) {
720 values.add(link_url);
721 }
722 } else {
723 values = info.getMultiInfo(names[i]);
724 }
725 if (values == null || values.size()==0) {
726 continue;
727 }
728 for (int j=0; j<values.size(); j++) {
729 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
730 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
731 return; // only want to add one value
732 }
733 }
734 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
735 return; // we have added all values of this meta elem
736 }
737 // otherwise, we will keep going through the list and add them all.
738 }
739 }
740
741 // specific metadata formats might need to do some custom metadata that is not
742 //just a standard mapping.
743 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
744
745
746 }
747
748 /** create the actual metadata element for the list */
749 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
750
751 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
752 meta_list_elem.appendChild(meta);
753 }
754
755
756 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
757 */
758 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
759
760 Element header = doc.createElement(OAIXML.HEADER);
761
762 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
763 if(deleted) {
764 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
765 // then the timestamp for deletion will be from oai-inf database
766 }
767
768 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
769 GSXML.setNodeText(identifier, coll_name + ":" + oid);
770 header.appendChild(identifier);
771 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
772 GSXML.setNodeText(set_spec, coll_name);
773 header.appendChild(set_spec);
774 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
775 GSXML.setNodeText(datestamp, oailastmodified);
776 header.appendChild(datestamp);
777 return header;
778 }
779
780 /** return the metadata information */
781 protected Element processListMetadataFormats(Element req) {
782 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
783 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
784 if (param == null) {
785 logger.error("An element containing the OID attribute not is present.");
786 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
787 }
788 String oid = param.getAttribute(GSXML.VALUE_ATT);
789 if (oid == null || oid.equals("")) {
790 logger.error("No OID is present in the request.");
791 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
792 }
793
794 /*
795 ArrayList<String> oid_list = null;
796 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
797 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
798
799 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
800 oid_list = getChildrenIds(OAIXML.BROWSELIST);
801 }
802 }
803 */
804 // assume meta formats are only for OIDs that have not been deleted
805 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
806 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
807 if (oid_list == null || oid_list.contains(oid) == false) {
808 logger.error("OID: " + oid + " is not present in the database.");
809 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
810// logger.error((new XMLConverter()).getPrettyString (e));
811 return e;
812 }
813
814 DBInfo info = null;
815 info = this.coll_db.getInfo(oid);
816 if (info == null) { //just double check
817 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
818 }
819
820 Document doc = XMLConverter.newDOM();
821 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
822
823 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
824 list_metadata_formats_response.appendChild(list_metadata_formats);
825 boolean has_meta_format = false;
826
827 // for each format in format_elements_map
828 Iterator<String> it = format_elements_map.keySet().iterator();
829 while (it.hasNext()) {
830 String format = it.next();
831 HashSet<String> set_of_elems = format_elements_map.get(format);
832 if (documentContainsMetadata(info, set_of_elems)) {
833 // add this format into the response
834 has_meta_format = true;
835 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
836 }
837 }
838
839 if (has_meta_format == false) {
840 logger.error("Specified metadata names are not contained in the database.");
841 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
842 } else {
843 return list_metadata_formats_response;
844 }
845 }
846
847 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
848 if (set_of_elems.size() == 0) {
849 return false;
850 }
851 Iterator<String> i = set_of_elems.iterator();
852 while (i.hasNext()) {
853 if (!info.getInfo(i.next()).equals("")) {
854 return true;
855 }
856 }
857 return false;
858 }
859
860 /** returns a list of the child ids in order, null if no children */
861 protected ArrayList<String> getChildrenIds(String node_id) {
862 DBInfo info = this.coll_db.getInfo(node_id);
863 if (info == null) {
864 return null;
865 }
866
867 String contains = info.getInfo("contains");
868 if (contains.equals("")) {
869 return null;
870 }
871 ArrayList<String> children = new ArrayList<String>();
872 StringTokenizer st = new StringTokenizer(contains, ";");
873 while (st.hasMoreTokens()) {
874 String child_id = st.nextToken().replaceAll("\"", node_id);
875 children.add(child_id);
876 }
877 return children;
878 }
879 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
880 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
881 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
882 * name which is mandatory.
883 */
884 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
885 if (metadata_names == null) return false;
886 logger.info("checking metadata names in db.");
887 for(int i=0; i<metadata_names.length; i++) {
888 int index = metadata_names[i].indexOf(",");
889 String meta_name = (index == -1) ? metadata_names[i] :
890 metadata_names[i].substring(index + 1);
891
892 if(info.getInfo(meta_name).equals("") == false) {
893 return true;
894 }
895 }
896 return false;
897 }
898
899 protected long getDateStampMillis(DBInfo info) {
900 // gs.OAIDateStamp is in YYYY-MM-DD
901 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
902 long millis = -1;
903 if (!time_stamp.equals("")) {
904 millis = OAIXML.getTime(time_stamp);
905 }
906 if (millis == -1) {
907 // oailastmodified is in seconds
908 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
909 if (!time_stamp.equals("")) {
910 millis = Long.parseLong(time_stamp)*1000;
911 }
912 }
913 return millis;
914
915
916 }
917}
918
919
Note: See TracBrowser for help on using the repository browser.