source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 31913

Last change on this file since 31913 was 31913, checked in by ak19, 7 years ago

Committing useful debug statement, but commented out.

File size: 42.6 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** the configure file of this receptionist passed from the oai servlet. */
51 protected Element oai_config = null;
52
53 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
54 protected int resume_after = -1 ;
55
56 /** the message router that the Receptionist and Actions will talk to */
57 protected ModuleInterface mr = null;
58
59 // Some of the data/responses will not change while the servlet is running, so
60 // we can cache them
61
62 /** A list of all the collections available to this OAI server */
63 protected Element collection_list = null;
64 /** a vector of the names, for convenience */
65 protected Vector<String> collection_name_list = null;
66 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
67 protected boolean noRecordsMatch = false;
68
69 /** A set of all known 'sets' */
70 protected HashSet<String> set_set = null;
71
72 protected boolean has_super_colls = false;
73 /** a hash of super set-> collection list */
74 protected HashMap<String, Vector<String>> super_coll_map = null;
75 /** store the super coll elements for convenience */
76 HashMap<String, Element> super_coll_data = null;
77 /** store the metadata formats ??????*/
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 configureSuperSetInfo();
122 if (!configureSetInfo()) {
123 // there are no sets
124 logger.error("No sets (collections) available for OAI");
125 return false;
126 }
127
128 // load in tokens from OAIResumptionToken.xml, and then clear out any
129 // expired ones.
130 OAIResumptionToken.init();
131 OAIResumptionToken.clearExpiredTokens();
132
133 return true;
134 }
135
136 // assuming that sets are static. If collections change then the servlet
137 // should be restarted.
138 private boolean configureSuperSetInfo() {
139 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
140 // Will be like
141 // <oaiSuperSet>
142 // <SetSpec>xxx</SetSpec>
143 // <setName>xxx</SetName>
144 // <SetDescription>xxx</setDescription>
145 // </oaiSuperSet>
146 // The super set is listed in OAIConfig, and collections themselves state
147 // whether they are part of the super set or not.
148 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
149 this.super_coll_data = new HashMap<String, Element>();
150 if (super_coll_list.getLength() > 0) {
151 this.has_super_colls = true;
152 for (int i=0; i<super_coll_list.getLength(); i++) {
153 Element super_coll = (Element)super_coll_list.item(i);
154 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
155 if (set_spec != null) {
156 String name = GSXML.getNodeText(set_spec);
157 if (!name.equals("")) {
158 this.super_coll_data.put(name, super_coll);
159 logger.info("adding in super coll "+name);
160 }
161 }
162 }
163
164 if (this.super_coll_data.size()==0) {
165 this.has_super_colls = false;
166 }
167 }
168 if (this.has_super_colls == true) {
169 this.super_coll_map = new HashMap<String, Vector<String>>();
170 }
171 return true;
172
173 }
174 private boolean configureSetInfo() {
175 this.set_set = new HashSet<String>();
176
177 // First, we get a list of all the OAI enabled collections
178 // We get this by sending a listSets request to the MR
179 Document doc = XMLConverter.newDOM();
180 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
181
182 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
183 message.appendChild(request);
184 Node msg_node = mr.process(message);
185
186 if (msg_node == null) {
187 logger.error("returned msg_node from mr is null");
188 return false;
189 }
190 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
191 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
192 if (coll_list == null) {
193 logger.error("coll_list is null");
194 return false;
195 }
196
197 this.collection_list = (Element)doc.importNode(coll_list, true);
198
199 // go through and store a list of collection names for convenience
200 // also create a 'to' attribute for the next request to the MR, which
201 // is a ListSets request to each collection
202 Node child = this.collection_list.getFirstChild();
203 if (child == null) {
204 logger.error("collection list has no children");
205 noRecordsMatch = true;
206 return false;
207 }
208
209 this.collection_name_list = new Vector<String>();
210 StringBuffer to = new StringBuffer();
211 boolean first = true;
212 while (child != null) {
213 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
214 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
215 this.collection_name_list.add(coll_id);
216 if (!first) {
217 to.append(',');
218 }
219 first = false;
220 to.append(coll_id+"/"+OAIXML.LIST_SETS);
221 }
222 child = child.getNextSibling();
223 }
224 if (first) {
225 // we haven't found any collections
226 logger.error("found no collection elements in collectionList");
227 noRecordsMatch = true;
228 return false;
229 }
230 Document listsets_doc = XMLConverter.newDOM();
231 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
232 this.listsets_response = getMessage(listsets_doc, listsets_element);
233
234 // Now, for each collection, get a list of all its sets
235 // might include subsets (classifiers) or super colls
236 // We'll reuse the first message, changing its type and to atts
237 request.setAttribute(GSXML.TYPE_ATT, "");
238 request.setAttribute(GSXML.TO_ATT, to.toString());
239 // send to MR
240 msg_node = mr.process(message);
241 //logger.info("*** " + XMLConverter.getPrettyString(msg_node));
242 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
243 for (int c=0; c<response_list.getLength(); c++) {
244 // for each collection's response
245 Element response = (Element)response_list.item(c);
246 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
247 logger.info("*** coll from response "+coll_name);
248 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
249 for (int j=0; j<set_list.getLength(); j++) {
250 // now check if it a super collection
251 Element set = (Element)set_list.item(j);
252 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
253 logger.info("*** set spec = "+set_spec);
254 // this may change if we add site name back in
255 // setSpecs will be collname or collname:subset or supercollname
256 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
257 // it must be a super coll spec
258 logger.info("*** found super coll, "+set_spec);
259 // check that it is a valid one from config
260 if (this.has_super_colls == true && this.super_coll_data.containsKey(set_spec)) {
261 Vector <String> subcolls = this.super_coll_map.get(set_spec);
262 if (subcolls == null) {
263 logger.info("*** its new!!");
264 // not in there yet
265 subcolls = new Vector<String>();
266 this.set_set.add(set_spec);
267 this.super_coll_map.put(set_spec, subcolls);
268 // the first time a supercoll is mentioned, add into the set list
269 logger.info("*** finding the set info "+XMLConverter.getPrettyString(this.super_coll_data.get(set_spec)));
270 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, this.super_coll_data.get(set_spec), OAIXML.SET, true));
271 }
272 // add this collection to the list for the super coll
273 subcolls.add(coll_name);
274 }
275 } else { // its either the coll itself or a subcoll
276 // add in the set
277 listsets_element.appendChild(listsets_doc.importNode(set, true));
278 this.set_set.add(set_spec);
279 }
280 } // for each set in the collection
281 } // for each OAI enabled collection
282 return true;
283 }
284
285 protected void resetMessageRouter() {
286 // we just need to send a configure request to MR
287 Document doc = XMLConverter.newDOM();
288 Element mr_request_message = doc.createElement(GSXML.MESSAGE_ELEM);
289 Element mr_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_SYSTEM, "", null);
290 mr_request_message.appendChild(mr_request);
291
292 Element system = doc.createElement(GSXML.SYSTEM_ELEM);
293 mr_request.appendChild(system);
294 system.setAttribute(GSXML.TYPE_ATT, GSXML.SYSTEM_TYPE_CONFIGURE);
295
296 Element response = (Element) this.mr.process(mr_request_message);
297 logger.info("*** configure response = "+XMLConverter.getPrettyString(response));
298 }
299 /** process using strings - just calls process using Elements */
300 public String process(String xml_in) {
301
302 Node message_node = XMLConverter.getDOM(xml_in);
303 Node page = process(message_node);
304 return XMLConverter.getString(page);
305 }
306
307 //Compose a message/response element used to send back to the OAIServer servlet.
308 //This method is only used within OAIReceptionist
309 private Element getMessage(Document doc, Element e) {
310 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
311 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
312 msg.appendChild(response);
313 response.appendChild(e);
314 return msg;
315 }
316
317 /** process - produce xml data in response to a request
318 * if something goes wrong, it returns null -
319 */
320 public Node process(Node message_node) {
321 logger.info("*** OAIReceptionist received request");
322
323 Element message = GSXML.nodeToElement(message_node);
324 logger.info("*** " + XMLConverter.getString(message));
325
326 // check that its a correct message tag
327 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
328 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
329 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
330 }
331
332 // get the request out of the message - assume that there is only one
333 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
334 if (request == null) {
335 logger.error(" message had no request!");
336 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
337 }
338
339 // special case, reset=true for reloading the MR and recept data
340 String reset = request.getAttribute("reset");
341 if (!reset.equals("")) {
342 resetMessageRouter();
343 configureSetInfo();
344 return OAIXML.createResetResponse(true);
345 }
346
347
348 //At this stage, the value of 'to' attribute of the request must be the 'verb'
349 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
350 String verb = request.getAttribute(GSXML.TO_ATT);
351 if (verb.equals(OAIXML.IDENTIFY)) {
352 return doIdentify();
353 }
354 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
355 return doListMetadataFormats(request);
356 }
357 if (verb.equals(OAIXML.LIST_SETS)) {
358 // we have composed the list sets response on init
359 // Note this means that list sets never uses resumption tokens
360 return this.listsets_response;
361 }
362 if (verb.equals(OAIXML.GET_RECORD)) {
363 return doGetRecord(request);
364 }
365 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
366 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
367 }
368 if (verb.equals(OAIXML.LIST_RECORDS)) {
369 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
370 }
371 // should never get here as verbs were checked in OAIServer
372 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
373
374 }
375
376
377 private int getResumeAfter() {
378 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
379 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
380 return -1;
381 }
382 private String getRepositoryIdentifier() {
383 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
384 if (ri != null) {
385 return GSXML.getNodeText(ri);
386 }
387 return "";
388 }
389
390
391 /** if the param_map contains strings other than those in valid_strs, return false;
392 * otherwise true.
393 */
394 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
395 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
396 for(int i=0; i<param_list.size(); i++) {
397 logger.info("*** param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
398 if (valid_strs.contains(param_list.get(i)) == false) {
399 return false;
400 }
401 }
402 return true;
403 }
404
405 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
406 // options: from, until, set, metadataPrefix, resumptionToken
407 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
408 HashSet<String> valid_strs = new HashSet<String>();
409 valid_strs.add(OAIXML.FROM);
410 valid_strs.add(OAIXML.UNTIL);
411 valid_strs.add(OAIXML.SET);
412 valid_strs.add(OAIXML.METADATA_PREFIX);
413 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
414
415 Document result_doc = XMLConverter.newDOM();
416 Element result_element = result_doc.createElement(verb);
417 boolean result_token_needed = false; // does this result need to include a
418 // resumption token
419
420 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
421
422 HashMap<String, String> param_map = GSXML.getParamMap(params);
423
424 // are all the params valid?
425 if (!areAllParamsValid(param_map, valid_strs)) {
426 logger.error("One of the params is invalid");
427 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
428 // TODO, need to tell the user which one was invalid ??
429 }
430
431 // Do we have a resumption token??
432 String token = null;
433 String from = null;
434 String until = null;
435 boolean set_requested = false;
436 String set_spec_str = null;
437 String prefix_value = null;
438 int cursor = 0;
439 int current_cursor = 0;
440 String current_set = null;
441 long initial_time = 0;
442
443 int total_size = -1; // we are only going to set this in resumption
444 // token if it is easy to work out, i.e. not sending extra requests to
445 // MR just to calculate total size
446
447 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
448 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
449 // validate resumptionToken
450 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
451 logger.info("has resumptionToken " + token);
452 if(OAIResumptionToken.isValidToken(token) == false) {
453 logger.error("token is not valid");
454 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
455 }
456 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
457 // initialise the request params from the stored token data
458 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
459 from = token_data.get(OAIXML.FROM);
460 until = token_data.get(OAIXML.UNTIL);
461 set_spec_str = token_data.get(OAIXML.SET);
462 if (set_spec_str != null) {
463 set_requested = true;
464 }
465 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
466 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
467 try {
468 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
469 cursor = cursor + resume_after; // increment cursor
470 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
471 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
472 } catch (NumberFormatException e) {
473 logger.error("tried to parse int from cursor data and failed");
474 }
475
476 // check that the collections/sets haven't changed since the token was issued
477 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
478 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
479 OAIResumptionToken.expireToken(token);
480 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
481 }
482 }
483 else {
484 // no resumption token, lets check the other params
485 // there must be a metadataPrefix
486 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
487 logger.error("metadataPrefix param required");
488 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
489 }
490
491 //if there are any date params, check they're of the right format
492 Date from_date = null;
493 Date until_date = null;
494
495 from = param_map.get(OAIXML.FROM);
496 if(from != null) {
497 from_date = OAIXML.getDate(from);
498 if(from_date == null) {
499 logger.error("invalid date: " + from);
500 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
501 }
502 }
503 until = param_map.get(OAIXML.UNTIL);
504 if(until != null) {
505 until_date = OAIXML.getDate(until);
506 if(until_date == null) {
507 logger.error("invalid date: " + until);
508 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
509 }
510 }
511
512 if(from != null && until != null) { // check they are of the same date-time format (granularity)
513 if(from.length() != until.length()) {
514 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
515 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
516 }
517
518 if(from_date.compareTo(until_date) > 0) { // from date can't be later than until date
519 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
520 }
521 }
522
523 if(until_date != null) {
524
525 // Also call until_date.compareTo(earliestdatestamp) as the until date can't precede the earliest timestamp
526 // Unfortunately, this test has to be done after the granularity test
527 // compareTo() returns the value 0 if the argument Date is equal to this Date; a value less than 0 if this Date is before
528 // the Date argument; and a value greater than 0 if this Date is after the Date argument.
529 long earliestDatestamp = getEarliestDateStamp(collection_list);
530 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
531 Date earliestDatestamp_date = OAIXML.getDate(earliestDatestamp_str);
532
533 if(until_date.compareTo(earliestDatestamp_date) < 0) {
534 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
535 }
536 }
537
538
539 // check the set arg is a set we know about
540 set_requested = param_map.containsKey(OAIXML.SET);
541 set_spec_str = null;
542 if(set_requested == true) {
543 set_spec_str = param_map.get(OAIXML.SET);
544 if (!this.set_set.contains(set_spec_str)) {
545 // the set is not one we know about
546 logger.error("requested set is not found in this repository");
547 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
548
549 }
550 }
551 // Is the metadataPrefix arg one this repository supports?
552 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
553 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
554 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
555 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
556 }
557
558 } // else no resumption token, check other params
559
560 // Whew. Now we have validated the params, we can work on doing the actual
561 // request
562
563
564 Document doc = XMLConverter.newDOM();
565 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
566 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
567 // TODO does this need a type???
568 mr_msg.appendChild(mr_req);
569
570 // copy in the from/until params if there
571 if (from != null) {
572 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
573 }
574 if (until != null) {
575 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
576 }
577 // add metadataPrefix
578 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
579
580 // do we have a set???
581 // if no set, we send to all collections in the collection list
582 // if super set, we send to all collections in super set list
583 // if a single collection, send to it
584 // if a subset, send to the collection
585 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
586 boolean single_collection = false;
587 if (current_coll_list.size() == 1) {
588 single_collection = true;
589 }
590 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
591 // we have a subset - add the set param back in
592 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
593 }
594
595 int num_collected_records = 0;
596 int start_point = current_cursor; // may not be 0 if we are using a resumption token
597 String resumption_collection = "";
598 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
599
600 // iterate through the list of collections and send the request to each
601
602 int start_coll=0;
603 if (current_set != null) {
604 // we are resuming a previous request, need to locate the first collection
605 for (int i=0; i<current_coll_list.size(); i++) {
606 if (current_set.equals(current_coll_list.get(i))) {
607 start_coll = i;
608 break;
609 }
610 }
611 }
612
613 for (int i=start_coll; i<current_coll_list.size(); i++) {
614 String current_coll = current_coll_list.get(i);
615 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
616
617 Element result = (Element)mr.process(mr_msg);
618 logger.info("*** " + verb+ " result for coll "+current_coll);
619 logger.info("*** " + XMLConverter.getPrettyString(result));
620 if (result == null) {
621 logger.info("message router returns null");
622 // do what??? carry on? fail??
623 return OAIXML.createErrorMessage("Internal service returns null", "");
624 }
625 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
626 if(res == null) {
627 logger.info("response element in xml_result is null");
628 return OAIXML.createErrorMessage("Internal service returns null", "");
629 }
630 NodeList record_list = res.getElementsByTagName(record_type);
631 int num_records = record_list.getLength();
632 if(num_records == 0) {
633 logger.info("message router returns 0 records for coll "+current_coll);
634 continue; // try the next collection
635 }
636 if (single_collection) {
637 total_size = num_records;
638 }
639 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
640 if (records_to_add > (num_records-start_point)) {
641 records_to_add = num_records-start_point;
642 }
643 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
644 num_collected_records += records_to_add;
645
646 // do we need to stop here, and do we need to issue a resumption token?
647 if (resume_after > 0 && num_collected_records == resume_after) {
648 // we have finished collecting records at the moment.
649 // but are we conincidentally at the end? or are there more to go?
650 if (records_to_add < (num_records - start_point)) {
651 // we have added less than this collection had
652 start_point += records_to_add;
653 resumption_collection = current_coll;
654 result_token_needed = true;
655 }
656 else {
657 // we added all this collection had to offer
658 // is there another collection in the list??
659 if (i<current_coll_list.size()-1) {
660 result_token_needed = true;
661 start_point = 0;
662 resumption_collection = current_coll_list.get(i+1);
663 }
664 else {
665 // we have finished one collection and there are no more collection
666 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
667 logger.info("*** at end of list, need empty result token");
668 empty_result_token = true;
669 }
670 }
671 break;
672 }
673 start_point = 0; // only the first one will have start non-zero, if we
674 // have a resumption token
675
676 } // for each collection
677
678 if (num_collected_records ==0) {
679 // there were no matching results
680 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
681 }
682
683 if (num_collected_records < resume_after) {
684 // we have been through all collections, and there are no more
685 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
686 if (result_token_needed) {
687 empty_result_token = true;
688 }
689 }
690
691 if (result_token_needed) {
692 // we need a resumption token
693 if (empty_result_token) {
694 logger.info("*** have empty result token");
695 token = "";
696 } else {
697 if (token != null) {
698 // we had a token for this request, we can just update it
699 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
700 } else {
701 // we are generating a new one
702 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
703 }
704 }
705
706 // result token XML
707 long expiration_date = -1;
708 if (empty_result_token) {
709 // we know how many records in total as we have sent them all
710 total_size = cursor+num_collected_records;
711 } else {
712 // non-empty token, set the expiration date
713 expiration_date = OAIResumptionToken.getExpirationDate(token);
714 }
715 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
716 // OAIXML.addToken(token_elem); // store it
717 result_element.appendChild(token_elem); // add to the result
718 }
719
720
721 return getMessage(result_doc, result_element);
722 }
723
724 private Vector<String> getCollectionListForSet(String set) {
725 if (set == null) {
726 // no set requested, need the complete collection list
727 return this.collection_name_list;
728 }
729 if (has_super_colls && super_coll_map.containsKey(set)) {
730 return super_coll_map.get(set);
731 }
732
733 Vector<String> coll_list = new Vector<String>();
734 if (set.indexOf(":") != -1) {
735 String col_name = set.substring(0, set.indexOf(":"));
736 coll_list.add(col_name);
737 }
738 else {
739 coll_list.add(set);
740 }
741 return coll_list;
742 }
743 private void addRecordsToList(Document doc, Element result_element, NodeList
744 record_list, int start_point, int num_records) {
745 int end_point = start_point + num_records;
746 for (int i=start_point; i<end_point; i++) {
747 result_element.appendChild(doc.importNode(record_list.item(i), true));
748 }
749 }
750
751 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
752 if(result == null) {
753 //in the first round, result is null
754 return msg;
755 }
756 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
757 if(res_in_result == null) { // return the results of all other collections accumulated so far
758 return msg;
759 }
760 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
761 if(msg == null) {
762 return result;
763 }
764
765 //e.g., get all <record> elements from the returned message. There may be none of
766 //such element, for example, the collection service returned an error message
767 NodeList elem_list = msg.getElementsByTagName(elem_name);
768
769 for (int i=0; i<elem_list.getLength(); i++) {
770 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
771 }
772 return result;
773 }
774
775
776 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
777 * The first one is handled here, and the last two are processed by OAIPMH.
778 */
779 private Element doListMetadataFormats(Element req) {
780 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
781 //, or there is no parameter; otherwise it is an error
782 //logger.info("" + XMLConverter.getString(msg));
783
784 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
785 Element param = null;
786 Document lmf_doc = XMLConverter.newDOM();
787 if(params.getLength() == 0) {
788 //this is requesting metadata formats for the whole repository
789 //read the oaiConfig.xml file, return the metadata formats specified there.
790 if (this.listmetadataformats_response != null) {
791 // we have already created it
792 return this.listmetadataformats_response;
793 }
794
795 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
796 // get all the formats out of oai_config
797 NodeList formats = oai_config.getElementsByTagName(OAIXML.METADATA_FORMAT);
798 if (formats.getLength() ==0) {
799 logger.error("OAIConfig.xml must contain the supported metadata formats");
800 // TODO this is internal error, what to do???
801 return getMessage(lmf_doc, list_metadata_formats);
802 }
803
804 for(int i=0; i<formats.getLength(); i++) {
805 Element f = OAIXML.getMetadataFormatShort(lmf_doc, (Element)formats.item(i));
806 list_metadata_formats.appendChild(f);
807 }
808 this.listmetadataformats_response = getMessage(lmf_doc, list_metadata_formats);
809 return this.listmetadataformats_response;
810
811 }
812
813 if (params.getLength() > 1) {
814 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
815 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
816 }
817
818 // This is a request for the metadata of a particular item with an identifier
819 /**the request xml is in the form: <request>
820 * <param name=.../>
821 * </request>
822 *And there is a param element and one element only. (No paramList element in between).
823 */
824 param = (Element)params.item(0);
825 String param_name = param.getAttribute(GSXML.NAME_ATT);
826 String identifier = "";
827 if (!param_name.equals(OAIXML.IDENTIFIER)) {
828 //Bad argument
829 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
830 }
831
832 identifier = param.getAttribute(GSXML.VALUE_ATT);
833 // the identifier is in the form: <coll_name>:<OID>
834 // so it must contain at least one ':' characters
835 // (the oid itself may contain : chars)
836 String[] strs = identifier.split(":", 2);
837 if(strs.length != 2) {
838 logger.error("identifier is not in the form coll:id" + identifier);
839 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
840 }
841
842 // send request to message router
843 // get the names
844 String coll_name = strs[0];
845 String oid = strs[1];
846
847 Document msg_doc = XMLConverter.newDOM();
848 Element message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
849 String verb = req.getAttribute(GSXML.TO_ATT);
850 String new_to = coll_name + "/" + verb;
851 Element request = GSXML.createBasicRequest(msg_doc, "oai???", new_to, null);
852 message.appendChild(request);
853 // add the id param
854 GSXML.addParameterToList(request, OAIXML.OID, oid);
855
856 //Now send the request to the message router to process
857 Node result_node = mr.process(message);
858 return GSXML.nodeToElement(result_node);
859 }
860
861 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
862 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
863 if(original_element != null) {
864 GSXML.copyNode(to_elem, original_element);
865 }
866 }
867
868
869 private Element doIdentify() {
870 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
871 logger.info("");
872 if (this.identify_response != null) {
873 // we have already created it
874 return getMessage(this.identify_response.getOwnerDocument(), this.identify_response);
875 }
876 Document doc = XMLConverter.newDOM();
877 Element identify = doc.createElement(OAIXML.IDENTIFY);
878 //do the repository name
879 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
880 //do the baseurl
881 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
882 //do the protocol version
883 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
884
885 //There can be more than one admin email according to the OAI specification
886 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
887 int num_admin = 0;
888 Element from_admin_email = null;
889 if (admin_emails != null) {
890 num_admin = admin_emails.getLength();
891 }
892 for (int i=0; i<num_admin; i++) {
893 GSXML.copyNode(identify, admin_emails.item(i));
894 }
895
896 //do the earliestDatestamp
897 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
898 //ask the message router for a list of oai collections
899 //NodeList oai_coll = getOAICollectionList();
900 long earliestDatestamp = getEarliestDateStamp(collection_list);
901 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
902 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
903 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
904 identify.appendChild(earliestDatestamp_elem);
905
906 //do the deletedRecord
907 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
908 //do the granularity
909 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
910
911 // output the oai identifier
912 Element description = doc.createElement(OAIXML.DESCRIPTION);
913 identify.appendChild(description);
914 // TODO, make this a valid id
915 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
916 description.appendChild(oaiIdentifier);
917
918 // if there are any oaiInfo metadata, add them in too.
919 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
920 if (info != null) {
921 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
922 if (meta != null && meta.getLength() > 0) {
923 Element gsdl = OAIXML.createGSDLElement(doc);
924 description.appendChild(gsdl);
925 for (int m = 0; m<meta.getLength(); m++) {
926 GSXML.copyNode(gsdl, meta.item(m));
927 }
928
929 }
930 }
931 this.identify_response = identify;
932 return getMessage(doc, identify);
933 }
934 /** split the identifier into <collection + OID> as an array
935 It has already been checked that the 'identifier' contains at least one ':'
936 */
937
938 /** validate if the specified metadata prefix value is supported by the repository
939 * by checking it in the OAIConfig.xml
940 */
941 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
942 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
943
944 for(int i=0; i<prefix_list.getLength(); i++) {
945 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
946 return true;
947 }
948 }
949 return false;
950 }
951 private Element doGetRecord(Element req){
952 logger.info("");
953 /** arguments:
954 identifier: required
955 metadataPrefix: required
956 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
957 */
958 Document doc = XMLConverter.newDOM();
959 Element get_record = doc.createElement(OAIXML.GET_RECORD);
960
961 HashSet<String> valid_strs = new HashSet<String>();
962 valid_strs.add(OAIXML.IDENTIFIER);
963 valid_strs.add(OAIXML.METADATA_PREFIX);
964
965 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
966 HashMap<String, String> param_map = GSXML.getParamMap(params);
967
968 if(!areAllParamsValid(param_map, valid_strs) ||
969 params.getLength() == 0 ||
970 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
971 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
972 logger.error("must have the metadataPrefix/identifier parameter.");
973 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
974 }
975
976 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
977 String identifier = param_map.get(OAIXML.IDENTIFIER);
978
979 // verify the metadata prefix
980 if (repositorySupportsMetadataPrefix(prefix) == false) {
981 logger.error("requested prefix is not found in OAIConfig.xml");
982 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
983 }
984
985 // get the names
986 String[] strs = identifier.split(":", 2);
987 if(strs == null || strs.length < 2) {
988 logger.error("identifier is not in the form coll:id" + identifier);
989 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
990 }
991 //String name_of_site = strs[0];
992 String coll_name = strs[0];
993 String oid = strs[1];
994
995 //re-organize the request element
996 // reset the 'to' attribute
997 String verb = req.getAttribute(GSXML.TO_ATT);
998 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
999 // reset the identifier element
1000 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
1001 if (param != null) {
1002 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
1003 param.setAttribute(GSXML.VALUE_ATT, oid);
1004 }
1005
1006 //Now send the request to the message router to process
1007 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1008 msg.appendChild(doc.importNode(req, true));
1009 Node result_node = mr.process(msg);
1010 return GSXML.nodeToElement(result_node);
1011 }
1012
1013 // See OAIConfig.xml
1014 // dynamically works out what the earliestDateStamp is, since it varies by collection
1015 // returns this time in *milliseconds*.
1016 protected long getEarliestDateStamp(Element oai_coll_list) {
1017 // config earliest datstamp
1018 long config_datestamp = 0;
1019 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1020 if (config_datestamp_elem != null) {
1021 String datest = GSXML.getNodeText(config_datestamp_elem);
1022 config_datestamp = OAIXML.getTime(datest);
1023 if (config_datestamp == -1) {
1024 config_datestamp = 0;
1025 }
1026 }
1027 //do the earliestDatestamp
1028 long current_time = System.currentTimeMillis();
1029 long earliestDatestamp = current_time;
1030 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1031 int oai_coll_size = oai_coll.getLength();
1032 if (oai_coll_size == 0) {
1033 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1034 return config_datestamp;
1035 }
1036 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1037 // we get the earliestDatestamp among the collections
1038 for(int i=0; i<oai_coll_size; i++) {
1039 String collName = collection_name_list.get(i);
1040 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP)); // Taken from oai-inf db's OAI_EARLIEST_TIMESTAMP_OID entry, else falls back to earliest datestamp field in buildcfg
1041 if (coll_earliestDatestamp == 0) {
1042 // try last modified
1043 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1044 //logger.info("@@@ Falling back to using collection " + collName + "'s lastmodified date as its earliest timestamp: " + coll_earliestDatestamp);
1045 }
1046 if (coll_earliestDatestamp > 0) {
1047 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1048 }
1049 }
1050 if (earliestDatestamp == current_time) {
1051 logger.info("no collection had a real datestamp, using value from OAIConfig");
1052 return config_datestamp;
1053 }
1054 return earliestDatestamp;
1055 }
1056
1057 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1058
1059 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1060 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1061
1062 Node child = this.collection_list.getFirstChild();
1063 while (child != null) {
1064 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1065 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1066 if (set_coll_list.contains(coll_id)) {
1067 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1068 if (initial_time < last_modified) {
1069 return true;
1070 }
1071 }
1072 }
1073 child = child.getNextSibling();
1074 }
1075 return false;
1076
1077 }
1078
1079}
1080
1081
Note: See TracBrowser for help on using the repository browser.