source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 31915

Last change on this file since 31915 was 31915, checked in by ak19, 7 years ago

Dr Bainbridge thought about it and decided that the correct solution is that, since a collection will always have an oai-inf db from now on, the earliest datestamp of a collection should not fall back to either buildconfig's earliestdatestamp field or else buildconfig's lastmodified. However, the latter are used as the publishing date by the RSS service, and so still stored as Collection.java's earliestDatestamp. Now OAICollection has a new additional field, earliestOAIDatestamp which contains the earliest timestamp in oai-inf db. The OAIReceptionist now determines the earliestDatestamp of the entire OAIRepository solely based on the earliestOAIDatestamp values across all OAICollections, also with no fallbacks on Collections' earliestDatestamp or lastModified fields.

File size: 43.6 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** the configure file of this receptionist passed from the oai servlet. */
51 protected Element oai_config = null;
52
53 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
54 protected int resume_after = -1 ;
55
56 /** the message router that the Receptionist and Actions will talk to */
57 protected ModuleInterface mr = null;
58
59 // Some of the data/responses will not change while the servlet is running, so
60 // we can cache them
61
62 /** A list of all the collections available to this OAI server */
63 protected Element collection_list = null;
64 /** a vector of the names, for convenience */
65 protected Vector<String> collection_name_list = null;
66 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
67 protected boolean noRecordsMatch = false;
68
69 /** A set of all known 'sets' */
70 protected HashSet<String> set_set = null;
71
72 protected boolean has_super_colls = false;
73 /** a hash of super set-> collection list */
74 protected HashMap<String, Vector<String>> super_coll_map = null;
75 /** store the super coll elements for convenience */
76 HashMap<String, Element> super_coll_data = null;
77 /** store the metadata formats ??????*/
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 configureSuperSetInfo();
122 if (!configureSetInfo()) {
123 // there are no sets
124 logger.error("No sets (collections) available for OAI");
125 return false;
126 }
127
128 // load in tokens from OAIResumptionToken.xml, and then clear out any
129 // expired ones.
130 OAIResumptionToken.init();
131 OAIResumptionToken.clearExpiredTokens();
132
133 return true;
134 }
135
136 // assuming that sets are static. If collections change then the servlet
137 // should be restarted.
138 private boolean configureSuperSetInfo() {
139 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
140 // Will be like
141 // <oaiSuperSet>
142 // <SetSpec>xxx</SetSpec>
143 // <setName>xxx</SetName>
144 // <SetDescription>xxx</setDescription>
145 // </oaiSuperSet>
146 // The super set is listed in OAIConfig, and collections themselves state
147 // whether they are part of the super set or not.
148 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
149 this.super_coll_data = new HashMap<String, Element>();
150 if (super_coll_list.getLength() > 0) {
151 this.has_super_colls = true;
152 for (int i=0; i<super_coll_list.getLength(); i++) {
153 Element super_coll = (Element)super_coll_list.item(i);
154 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
155 if (set_spec != null) {
156 String name = GSXML.getNodeText(set_spec);
157 if (!name.equals("")) {
158 this.super_coll_data.put(name, super_coll);
159 logger.info("adding in super coll "+name);
160 }
161 }
162 }
163
164 if (this.super_coll_data.size()==0) {
165 this.has_super_colls = false;
166 }
167 }
168 if (this.has_super_colls == true) {
169 this.super_coll_map = new HashMap<String, Vector<String>>();
170 }
171 return true;
172
173 }
174 private boolean configureSetInfo() {
175 this.set_set = new HashSet<String>();
176
177 // First, we get a list of all the OAI enabled collections
178 // We get this by sending a listSets request to the MR
179 Document doc = XMLConverter.newDOM();
180 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
181
182 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
183 message.appendChild(request);
184 Node msg_node = mr.process(message);
185
186 if (msg_node == null) {
187 logger.error("returned msg_node from mr is null");
188 return false;
189 }
190 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
191 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
192 if (coll_list == null) {
193 logger.error("coll_list is null");
194 return false;
195 }
196
197 this.collection_list = (Element)doc.importNode(coll_list, true);
198
199 // go through and store a list of collection names for convenience
200 // also create a 'to' attribute for the next request to the MR, which
201 // is a ListSets request to each collection
202 Node child = this.collection_list.getFirstChild();
203 if (child == null) {
204 logger.error("collection list has no children");
205 noRecordsMatch = true;
206 return false;
207 }
208
209 this.collection_name_list = new Vector<String>();
210 StringBuffer to = new StringBuffer();
211 boolean first = true;
212 while (child != null) {
213 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
214 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
215 this.collection_name_list.add(coll_id);
216 if (!first) {
217 to.append(',');
218 }
219 first = false;
220 to.append(coll_id+"/"+OAIXML.LIST_SETS);
221 }
222 child = child.getNextSibling();
223 }
224 if (first) {
225 // we haven't found any collections
226 logger.error("found no collection elements in collectionList");
227 noRecordsMatch = true;
228 return false;
229 }
230 Document listsets_doc = XMLConverter.newDOM();
231 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
232 this.listsets_response = getMessage(listsets_doc, listsets_element);
233
234 // Now, for each collection, get a list of all its sets
235 // might include subsets (classifiers) or super colls
236 // We'll reuse the first message, changing its type and to atts
237 request.setAttribute(GSXML.TYPE_ATT, "");
238 request.setAttribute(GSXML.TO_ATT, to.toString());
239 // send to MR
240 msg_node = mr.process(message);
241 //logger.info("*** " + XMLConverter.getPrettyString(msg_node));
242 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
243 for (int c=0; c<response_list.getLength(); c++) {
244 // for each collection's response
245 Element response = (Element)response_list.item(c);
246 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
247 logger.info("*** coll from response "+coll_name);
248 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
249 for (int j=0; j<set_list.getLength(); j++) {
250 // now check if it a super collection
251 Element set = (Element)set_list.item(j);
252 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
253 logger.info("*** set spec = "+set_spec);
254 // this may change if we add site name back in
255 // setSpecs will be collname or collname:subset or supercollname
256 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
257 // it must be a super coll spec
258 logger.info("*** found super coll, "+set_spec);
259 // check that it is a valid one from config
260 if (this.has_super_colls == true && this.super_coll_data.containsKey(set_spec)) {
261 Vector <String> subcolls = this.super_coll_map.get(set_spec);
262 if (subcolls == null) {
263 logger.info("*** its new!!");
264 // not in there yet
265 subcolls = new Vector<String>();
266 this.set_set.add(set_spec);
267 this.super_coll_map.put(set_spec, subcolls);
268 // the first time a supercoll is mentioned, add into the set list
269 logger.info("*** finding the set info "+XMLConverter.getPrettyString(this.super_coll_data.get(set_spec)));
270 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, this.super_coll_data.get(set_spec), OAIXML.SET, true));
271 }
272 // add this collection to the list for the super coll
273 subcolls.add(coll_name);
274 }
275 } else { // its either the coll itself or a subcoll
276 // add in the set
277 listsets_element.appendChild(listsets_doc.importNode(set, true));
278 this.set_set.add(set_spec);
279 }
280 } // for each set in the collection
281 } // for each OAI enabled collection
282 return true;
283 }
284
285 protected void resetMessageRouter() {
286 // we just need to send a configure request to MR
287 Document doc = XMLConverter.newDOM();
288 Element mr_request_message = doc.createElement(GSXML.MESSAGE_ELEM);
289 Element mr_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_SYSTEM, "", null);
290 mr_request_message.appendChild(mr_request);
291
292 Element system = doc.createElement(GSXML.SYSTEM_ELEM);
293 mr_request.appendChild(system);
294 system.setAttribute(GSXML.TYPE_ATT, GSXML.SYSTEM_TYPE_CONFIGURE);
295
296 Element response = (Element) this.mr.process(mr_request_message);
297 logger.info("*** configure response = "+XMLConverter.getPrettyString(response));
298 }
299 /** process using strings - just calls process using Elements */
300 public String process(String xml_in) {
301
302 Node message_node = XMLConverter.getDOM(xml_in);
303 Node page = process(message_node);
304 return XMLConverter.getString(page);
305 }
306
307 //Compose a message/response element used to send back to the OAIServer servlet.
308 //This method is only used within OAIReceptionist
309 private Element getMessage(Document doc, Element e) {
310 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
311 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
312 msg.appendChild(response);
313 response.appendChild(e);
314 return msg;
315 }
316
317 /** process - produce xml data in response to a request
318 * if something goes wrong, it returns null -
319 */
320 public Node process(Node message_node) {
321 logger.info("*** OAIReceptionist received request");
322
323 Element message = GSXML.nodeToElement(message_node);
324 logger.info("*** " + XMLConverter.getString(message));
325
326 // check that its a correct message tag
327 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
328 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
329 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
330 }
331
332 // get the request out of the message - assume that there is only one
333 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
334 if (request == null) {
335 logger.error(" message had no request!");
336 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
337 }
338
339 // special case, reset=true for reloading the MR and recept data
340 String reset = request.getAttribute("reset");
341 if (!reset.equals("")) {
342 resetMessageRouter();
343 configureSetInfo();
344 return OAIXML.createResetResponse(true);
345 }
346
347
348 //At this stage, the value of 'to' attribute of the request must be the 'verb'
349 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
350 String verb = request.getAttribute(GSXML.TO_ATT);
351 if (verb.equals(OAIXML.IDENTIFY)) {
352 return doIdentify();
353 }
354 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
355 return doListMetadataFormats(request);
356 }
357 if (verb.equals(OAIXML.LIST_SETS)) {
358 // we have composed the list sets response on init
359 // Note this means that list sets never uses resumption tokens
360 return this.listsets_response;
361 }
362 if (verb.equals(OAIXML.GET_RECORD)) {
363 return doGetRecord(request);
364 }
365 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
366 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
367 }
368 if (verb.equals(OAIXML.LIST_RECORDS)) {
369 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
370 }
371 // should never get here as verbs were checked in OAIServer
372 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
373
374 }
375
376
377 private int getResumeAfter() {
378 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
379 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
380 return -1;
381 }
382 private String getRepositoryIdentifier() {
383 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
384 if (ri != null) {
385 return GSXML.getNodeText(ri);
386 }
387 return "";
388 }
389
390
391 /** if the param_map contains strings other than those in valid_strs, return false;
392 * otherwise true.
393 */
394 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
395 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
396 for(int i=0; i<param_list.size(); i++) {
397 logger.info("*** param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
398 if (valid_strs.contains(param_list.get(i)) == false) {
399 return false;
400 }
401 }
402 return true;
403 }
404
405 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
406 // options: from, until, set, metadataPrefix, resumptionToken
407 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
408 HashSet<String> valid_strs = new HashSet<String>();
409 valid_strs.add(OAIXML.FROM);
410 valid_strs.add(OAIXML.UNTIL);
411 valid_strs.add(OAIXML.SET);
412 valid_strs.add(OAIXML.METADATA_PREFIX);
413 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
414
415 Document result_doc = XMLConverter.newDOM();
416 Element result_element = result_doc.createElement(verb);
417 boolean result_token_needed = false; // does this result need to include a
418 // resumption token
419
420 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
421
422 HashMap<String, String> param_map = GSXML.getParamMap(params);
423
424 // are all the params valid?
425 if (!areAllParamsValid(param_map, valid_strs)) {
426 logger.error("One of the params is invalid");
427 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
428 // TODO, need to tell the user which one was invalid ??
429 }
430
431 // Do we have a resumption token??
432 String token = null;
433 String from = null;
434 String until = null;
435 boolean set_requested = false;
436 String set_spec_str = null;
437 String prefix_value = null;
438 int cursor = 0;
439 int current_cursor = 0;
440 String current_set = null;
441 long initial_time = 0;
442
443 int total_size = -1; // we are only going to set this in resumption
444 // token if it is easy to work out, i.e. not sending extra requests to
445 // MR just to calculate total size
446
447 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
448 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
449 // validate resumptionToken
450 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
451 logger.info("has resumptionToken " + token);
452 if(OAIResumptionToken.isValidToken(token) == false) {
453 logger.error("token is not valid");
454 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
455 }
456 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
457 // initialise the request params from the stored token data
458 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
459 from = token_data.get(OAIXML.FROM);
460 until = token_data.get(OAIXML.UNTIL);
461 set_spec_str = token_data.get(OAIXML.SET);
462 if (set_spec_str != null) {
463 set_requested = true;
464 }
465 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
466 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
467 try {
468 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
469 cursor = cursor + resume_after; // increment cursor
470 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
471 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
472 } catch (NumberFormatException e) {
473 logger.error("tried to parse int from cursor data and failed");
474 }
475
476 // check that the collections/sets haven't changed since the token was issued
477 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
478 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
479 OAIResumptionToken.expireToken(token);
480 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
481 }
482 }
483 else {
484 // no resumption token, lets check the other params
485 // there must be a metadataPrefix
486 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
487 logger.error("metadataPrefix param required");
488 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
489 }
490
491 //if there are any date params, check they're of the right format
492 Date from_date = null;
493 Date until_date = null;
494
495 from = param_map.get(OAIXML.FROM);
496 if(from != null) {
497 from_date = OAIXML.getDate(from);
498 if(from_date == null) {
499 logger.error("invalid date: " + from);
500 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
501 }
502 }
503 until = param_map.get(OAIXML.UNTIL);
504 if(until != null) {
505 until_date = OAIXML.getDate(until);
506 if(until_date == null) {
507 logger.error("invalid date: " + until);
508 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
509 }
510 }
511
512 if(from != null && until != null) { // check they are of the same date-time format (granularity)
513 if(from.length() != until.length()) {
514 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
515 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
516 }
517
518 if(from_date.compareTo(until_date) > 0) { // from date can't be later than until date
519 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
520 }
521 }
522
523 if(until_date != null) {
524
525 // Also call until_date.compareTo(earliestdatestamp) as the until date can't precede the earliest timestamp
526 // Unfortunately, this test has to be done after the granularity test
527 // compareTo() returns the value 0 if the argument Date is equal to this Date; a value less than 0 if this Date is before
528 // the Date argument; and a value greater than 0 if this Date is after the Date argument.
529 long earliestDatestamp = getEarliestDateStamp(collection_list);
530 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
531 Date earliestDatestamp_date = OAIXML.getDate(earliestDatestamp_str);
532
533 if(until_date.compareTo(earliestDatestamp_date) < 0) {
534 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
535 }
536 }
537
538
539 // check the set arg is a set we know about
540 set_requested = param_map.containsKey(OAIXML.SET);
541 set_spec_str = null;
542 if(set_requested == true) {
543 set_spec_str = param_map.get(OAIXML.SET);
544 if (!this.set_set.contains(set_spec_str)) {
545 // the set is not one we know about
546 logger.error("requested set is not found in this repository");
547 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
548
549 }
550 }
551 // Is the metadataPrefix arg one this repository supports?
552 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
553 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
554 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
555 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
556 }
557
558 } // else no resumption token, check other params
559
560 // Whew. Now we have validated the params, we can work on doing the actual
561 // request
562
563
564 Document doc = XMLConverter.newDOM();
565 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
566 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
567 // TODO does this need a type???
568 mr_msg.appendChild(mr_req);
569
570 // copy in the from/until params if there
571 if (from != null) {
572 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
573 }
574 if (until != null) {
575 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
576 }
577 // add metadataPrefix
578 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
579
580 // do we have a set???
581 // if no set, we send to all collections in the collection list
582 // if super set, we send to all collections in super set list
583 // if a single collection, send to it
584 // if a subset, send to the collection
585 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
586 boolean single_collection = false;
587 if (current_coll_list.size() == 1) {
588 single_collection = true;
589 }
590 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
591 // we have a subset - add the set param back in
592 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
593 }
594
595 int num_collected_records = 0;
596 int start_point = current_cursor; // may not be 0 if we are using a resumption token
597 String resumption_collection = "";
598 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
599
600 // iterate through the list of collections and send the request to each
601
602 int start_coll=0;
603 if (current_set != null) {
604 // we are resuming a previous request, need to locate the first collection
605 for (int i=0; i<current_coll_list.size(); i++) {
606 if (current_set.equals(current_coll_list.get(i))) {
607 start_coll = i;
608 break;
609 }
610 }
611 }
612
613 for (int i=start_coll; i<current_coll_list.size(); i++) {
614 String current_coll = current_coll_list.get(i);
615 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
616
617 Element result = (Element)mr.process(mr_msg);
618 logger.info("*** " + verb+ " result for coll "+current_coll);
619 logger.info("*** " + XMLConverter.getPrettyString(result));
620 if (result == null) {
621 logger.info("message router returns null");
622 // do what??? carry on? fail??
623 return OAIXML.createErrorMessage("Internal service returns null", "");
624 }
625 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
626 if(res == null) {
627 logger.info("response element in xml_result is null");
628 return OAIXML.createErrorMessage("Internal service returns null", "");
629 }
630 NodeList record_list = res.getElementsByTagName(record_type);
631 int num_records = record_list.getLength();
632 if(num_records == 0) {
633 logger.info("message router returns 0 records for coll "+current_coll);
634 continue; // try the next collection
635 }
636 if (single_collection) {
637 total_size = num_records;
638 }
639 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
640 if (records_to_add > (num_records-start_point)) {
641 records_to_add = num_records-start_point;
642 }
643 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
644 num_collected_records += records_to_add;
645
646 // do we need to stop here, and do we need to issue a resumption token?
647 if (resume_after > 0 && num_collected_records == resume_after) {
648 // we have finished collecting records at the moment.
649 // but are we conincidentally at the end? or are there more to go?
650 if (records_to_add < (num_records - start_point)) {
651 // we have added less than this collection had
652 start_point += records_to_add;
653 resumption_collection = current_coll;
654 result_token_needed = true;
655 }
656 else {
657 // we added all this collection had to offer
658 // is there another collection in the list??
659 if (i<current_coll_list.size()-1) {
660 result_token_needed = true;
661 start_point = 0;
662 resumption_collection = current_coll_list.get(i+1);
663 }
664 else {
665 // we have finished one collection and there are no more collection
666 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
667 logger.info("*** at end of list, need empty result token");
668 empty_result_token = true;
669 }
670 }
671 break;
672 }
673 start_point = 0; // only the first one will have start non-zero, if we
674 // have a resumption token
675
676 } // for each collection
677
678 if (num_collected_records ==0) {
679 // there were no matching results
680 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
681 }
682
683 if (num_collected_records < resume_after) {
684 // we have been through all collections, and there are no more
685 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
686 if (result_token_needed) {
687 empty_result_token = true;
688 }
689 }
690
691 if (result_token_needed) {
692 // we need a resumption token
693 if (empty_result_token) {
694 logger.info("*** have empty result token");
695 token = "";
696 } else {
697 if (token != null) {
698 // we had a token for this request, we can just update it
699 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
700 } else {
701 // we are generating a new one
702 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
703 }
704 }
705
706 // result token XML
707 long expiration_date = -1;
708 if (empty_result_token) {
709 // we know how many records in total as we have sent them all
710 total_size = cursor+num_collected_records;
711 } else {
712 // non-empty token, set the expiration date
713 expiration_date = OAIResumptionToken.getExpirationDate(token);
714 }
715 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
716 // OAIXML.addToken(token_elem); // store it
717 result_element.appendChild(token_elem); // add to the result
718 }
719
720
721 return getMessage(result_doc, result_element);
722 }
723
724 private Vector<String> getCollectionListForSet(String set) {
725 if (set == null) {
726 // no set requested, need the complete collection list
727 return this.collection_name_list;
728 }
729 if (has_super_colls && super_coll_map.containsKey(set)) {
730 return super_coll_map.get(set);
731 }
732
733 Vector<String> coll_list = new Vector<String>();
734 if (set.indexOf(":") != -1) {
735 String col_name = set.substring(0, set.indexOf(":"));
736 coll_list.add(col_name);
737 }
738 else {
739 coll_list.add(set);
740 }
741 return coll_list;
742 }
743 private void addRecordsToList(Document doc, Element result_element, NodeList
744 record_list, int start_point, int num_records) {
745 int end_point = start_point + num_records;
746 for (int i=start_point; i<end_point; i++) {
747 result_element.appendChild(doc.importNode(record_list.item(i), true));
748 }
749 }
750
751 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
752 if(result == null) {
753 //in the first round, result is null
754 return msg;
755 }
756 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
757 if(res_in_result == null) { // return the results of all other collections accumulated so far
758 return msg;
759 }
760 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
761 if(msg == null) {
762 return result;
763 }
764
765 //e.g., get all <record> elements from the returned message. There may be none of
766 //such element, for example, the collection service returned an error message
767 NodeList elem_list = msg.getElementsByTagName(elem_name);
768
769 for (int i=0; i<elem_list.getLength(); i++) {
770 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
771 }
772 return result;
773 }
774
775
776 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
777 * The first one is handled here, and the last two are processed by OAIPMH.
778 */
779 private Element doListMetadataFormats(Element req) {
780 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
781 //, or there is no parameter; otherwise it is an error
782 //logger.info("" + XMLConverter.getString(msg));
783
784 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
785 Element param = null;
786 Document lmf_doc = XMLConverter.newDOM();
787 if(params.getLength() == 0) {
788 //this is requesting metadata formats for the whole repository
789 //read the oaiConfig.xml file, return the metadata formats specified there.
790 if (this.listmetadataformats_response != null) {
791 // we have already created it
792 return this.listmetadataformats_response;
793 }
794
795 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
796 // get all the formats out of oai_config
797 NodeList formats = oai_config.getElementsByTagName(OAIXML.METADATA_FORMAT);
798 if (formats.getLength() ==0) {
799 logger.error("OAIConfig.xml must contain the supported metadata formats");
800 // TODO this is internal error, what to do???
801 return getMessage(lmf_doc, list_metadata_formats);
802 }
803
804 for(int i=0; i<formats.getLength(); i++) {
805 Element f = OAIXML.getMetadataFormatShort(lmf_doc, (Element)formats.item(i));
806 list_metadata_formats.appendChild(f);
807 }
808 this.listmetadataformats_response = getMessage(lmf_doc, list_metadata_formats);
809 return this.listmetadataformats_response;
810
811 }
812
813 if (params.getLength() > 1) {
814 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
815 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
816 }
817
818 // This is a request for the metadata of a particular item with an identifier
819 /**the request xml is in the form: <request>
820 * <param name=.../>
821 * </request>
822 *And there is a param element and one element only. (No paramList element in between).
823 */
824 param = (Element)params.item(0);
825 String param_name = param.getAttribute(GSXML.NAME_ATT);
826 String identifier = "";
827 if (!param_name.equals(OAIXML.IDENTIFIER)) {
828 //Bad argument
829 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
830 }
831
832 identifier = param.getAttribute(GSXML.VALUE_ATT);
833 // the identifier is in the form: <coll_name>:<OID>
834 // so it must contain at least one ':' characters
835 // (the oid itself may contain : chars)
836 String[] strs = identifier.split(":", 2);
837 if(strs.length != 2) {
838 logger.error("identifier is not in the form coll:id" + identifier);
839 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
840 }
841
842 // send request to message router
843 // get the names
844 String coll_name = strs[0];
845 String oid = strs[1];
846
847 Document msg_doc = XMLConverter.newDOM();
848 Element message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
849 String verb = req.getAttribute(GSXML.TO_ATT);
850 String new_to = coll_name + "/" + verb;
851 Element request = GSXML.createBasicRequest(msg_doc, "oai???", new_to, null);
852 message.appendChild(request);
853 // add the id param
854 GSXML.addParameterToList(request, OAIXML.OID, oid);
855
856 //Now send the request to the message router to process
857 Node result_node = mr.process(message);
858 return GSXML.nodeToElement(result_node);
859 }
860
861 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
862 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
863 if(original_element != null) {
864 GSXML.copyNode(to_elem, original_element);
865 }
866 }
867
868
869 private Element doIdentify() {
870 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
871 logger.info("");
872 if (this.identify_response != null) {
873 // we have already created it
874 return getMessage(this.identify_response.getOwnerDocument(), this.identify_response);
875 }
876 Document doc = XMLConverter.newDOM();
877 Element identify = doc.createElement(OAIXML.IDENTIFY);
878 //do the repository name
879 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
880 //do the baseurl
881 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
882 //do the protocol version
883 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
884
885 //There can be more than one admin email according to the OAI specification
886 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
887 int num_admin = 0;
888 Element from_admin_email = null;
889 if (admin_emails != null) {
890 num_admin = admin_emails.getLength();
891 }
892 for (int i=0; i<num_admin; i++) {
893 GSXML.copyNode(identify, admin_emails.item(i));
894 }
895
896 //do the earliestDatestamp
897 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
898 //ask the message router for a list of oai collections
899 //NodeList oai_coll = getOAICollectionList();
900 long earliestDatestamp = getEarliestDateStamp(collection_list);
901 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
902 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
903 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
904 identify.appendChild(earliestDatestamp_elem);
905
906 //do the deletedRecord
907 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
908 //do the granularity
909 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
910
911 // output the oai identifier
912 Element description = doc.createElement(OAIXML.DESCRIPTION);
913 identify.appendChild(description);
914 // TODO, make this a valid id
915 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
916 description.appendChild(oaiIdentifier);
917
918 // if there are any oaiInfo metadata, add them in too.
919 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
920 if (info != null) {
921 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
922 if (meta != null && meta.getLength() > 0) {
923 Element gsdl = OAIXML.createGSDLElement(doc);
924 description.appendChild(gsdl);
925 for (int m = 0; m<meta.getLength(); m++) {
926 GSXML.copyNode(gsdl, meta.item(m));
927 }
928
929 }
930 }
931 this.identify_response = identify;
932 return getMessage(doc, identify);
933 }
934 /** split the identifier into <collection + OID> as an array
935 It has already been checked that the 'identifier' contains at least one ':'
936 */
937
938 /** validate if the specified metadata prefix value is supported by the repository
939 * by checking it in the OAIConfig.xml
940 */
941 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
942 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
943
944 for(int i=0; i<prefix_list.getLength(); i++) {
945 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
946 return true;
947 }
948 }
949 return false;
950 }
951 private Element doGetRecord(Element req){
952 logger.info("");
953 /** arguments:
954 identifier: required
955 metadataPrefix: required
956 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
957 */
958 Document doc = XMLConverter.newDOM();
959 Element get_record = doc.createElement(OAIXML.GET_RECORD);
960
961 HashSet<String> valid_strs = new HashSet<String>();
962 valid_strs.add(OAIXML.IDENTIFIER);
963 valid_strs.add(OAIXML.METADATA_PREFIX);
964
965 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
966 HashMap<String, String> param_map = GSXML.getParamMap(params);
967
968 if(!areAllParamsValid(param_map, valid_strs) ||
969 params.getLength() == 0 ||
970 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
971 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
972 logger.error("must have the metadataPrefix/identifier parameter.");
973 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
974 }
975
976 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
977 String identifier = param_map.get(OAIXML.IDENTIFIER);
978
979 // verify the metadata prefix
980 if (repositorySupportsMetadataPrefix(prefix) == false) {
981 logger.error("requested prefix is not found in OAIConfig.xml");
982 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
983 }
984
985 // get the names
986 String[] strs = identifier.split(":", 2);
987 if(strs == null || strs.length < 2) {
988 logger.error("identifier is not in the form coll:id" + identifier);
989 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
990 }
991 //String name_of_site = strs[0];
992 String coll_name = strs[0];
993 String oid = strs[1];
994
995 //re-organize the request element
996 // reset the 'to' attribute
997 String verb = req.getAttribute(GSXML.TO_ATT);
998 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
999 // reset the identifier element
1000 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
1001 if (param != null) {
1002 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
1003 param.setAttribute(GSXML.VALUE_ATT, oid);
1004 }
1005
1006 //Now send the request to the message router to process
1007 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1008 msg.appendChild(doc.importNode(req, true));
1009 Node result_node = mr.process(msg);
1010 return GSXML.nodeToElement(result_node);
1011 }
1012
1013 // See OAIConfig.xml
1014 // dynamically works out what the earliestDateStamp is, since it varies by collection
1015 // returns this time in *milliseconds*.
1016 protected long getEarliestDateStamp(Element oai_coll_list) {
1017 // config earliest datstamp
1018 long config_datestamp = 0;
1019 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1020 if (config_datestamp_elem != null) {
1021 String datest = GSXML.getNodeText(config_datestamp_elem);
1022 config_datestamp = OAIXML.getTime(datest);
1023 if (config_datestamp == -1) {
1024 config_datestamp = 0;
1025 }
1026 }
1027 //do the earliestDatestamp
1028 long current_time = System.currentTimeMillis();
1029 long earliestDatestamp = current_time;
1030 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1031 int oai_coll_size = oai_coll.getLength();
1032 if (oai_coll_size == 0) {
1033 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1034 return config_datestamp;
1035 }
1036 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1037 // we get the earliestDatestamp among the collections
1038 for(int i=0; i<oai_coll_size; i++) {
1039 String collName = collection_name_list.get(i);
1040 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_OAI_DATESTAMP)); // Taken from oai-inf db's OAI_EARLIEST_TIMESTAMP_OID entry, -1 if not found
1041
1042 if (coll_earliestDatestamp > 0 && earliestDatestamp > coll_earliestDatestamp) {
1043 earliestDatestamp = coll_earliestDatestamp;
1044 //logger.info("@@@ Found earlier timestamp: " + earliestDatestamp + " ms");
1045 }
1046 }
1047
1048 // we're no longer trying fallbacks for earliestDatestamp (other than the extreme fallback of
1049 // unix epoch time) because, going forward, all collections will have oai-inf db containing
1050 // an entry for earliesttimestamp. And all OAICollections will moreover have them stored and
1051 // will return them upon calling getEarliestOAIDatestamp().
1052 /*
1053 if(earliestDatestamp == current_time) {
1054 logger.info("Can't determine earliesttimestamp from oai-inf.db for any OAI collection. Trying timestamps in build config...");
1055 for(int i=0; i<oai_coll_size; i++) {
1056 String collName = collection_name_list.get(i);
1057 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP)); // Taken from the earliest datestamp field in buildcfg
1058 if (coll_earliestDatestamp == 0) {
1059 // try last modified
1060 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1061 //logger.info("@@@ Falling back to using collection " + collName + "'s lastmodified date as its earliest timestamp: " + coll_earliestDatestamp);
1062 }
1063 if (coll_earliestDatestamp > 0) {
1064 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1065 }
1066 }
1067 }
1068 */
1069
1070 if (earliestDatestamp == current_time) {
1071 logger.info("no collection had a real datestamp, using value from OAIConfig");
1072 return config_datestamp;
1073 }
1074 return earliestDatestamp;
1075 }
1076
1077 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1078
1079 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1080 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1081
1082 Node child = this.collection_list.getFirstChild();
1083 while (child != null) {
1084 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1085 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1086 if (set_coll_list.contains(coll_id)) {
1087 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1088 if (initial_time < last_modified) {
1089 return true;
1090 }
1091 }
1092 }
1093 child = child.getNextSibling();
1094 }
1095 return false;
1096
1097 }
1098
1099}
1100
1101
Note: See TracBrowser for help on using the repository browser.