source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 24440

Last change on this file since 24440 was 24440, checked in by ak19, 13 years ago

GS2 failed new OAI validation test at openarchives.org The code failed on a listRecords request with a resumptiontoken: since no metadata_prefix was specified, the code was set to fail. Now, the metadata_prefix is appended to the end of the resumptiontoken so that a follow-up request to listrecords can work out what the metadata-prefix was from the resumptiontoken.

File size: 45.8 KB
Line 
1package org.greenstone.gsdl3.core;
2
3import org.greenstone.gsdl3.util.*;
4import org.greenstone.gsdl3.action.*;
5// XML classes
6import org.w3c.dom.Node;
7import org.w3c.dom.NodeList;
8import org.w3c.dom.Document;
9import org.w3c.dom.Element;
10
11// other java classes
12import java.io.File;
13import java.util.*;
14
15import org.apache.log4j.*;
16
17/** a Receptionist, used for oai metadata response xml generation.
18 * This receptionist talks to the message router directly,
19 * instead of via any action, hence no action map is needed.
20 * @see the basic Receptionist
21 */
22public class OAIReceptionist implements ModuleInterface {
23
24 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
25
26 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
27 protected String site_name = null;
28 /** container Document to create XML Nodes for requests sent to message router
29 * Not used for response
30 */
31 protected Document doc=null;
32
33 /** a converter class to parse XML and create Docs */
34 protected XMLConverter converter=null;
35
36 /** the configure file of this receptionist passed from the oai servlet. */
37 protected Element oai_config = null;
38
39 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
40 protected int resume_after = -1 ;
41
42 /** the message router that the Receptionist and Actions will talk to */
43 protected ModuleInterface mr = null;
44
45 public OAIReceptionist() {
46 this.converter = new XMLConverter();
47 this.doc = this.converter.newDOM();
48
49 }
50
51 public void cleanUp() {}
52
53 public void setSiteName(String site_name) {
54 this.site_name = site_name;
55 }
56 /** sets the message router - it should already be created and
57 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
58 public void setMessageRouter(ModuleInterface mr) {
59 this.mr = mr;
60 }
61
62 /** configures the receptionist */
63 public boolean configure(Element config) {
64
65 if (this.mr==null) {
66 logger.error(" message routers must be set before calling oai configure");
67 return false;
68 }
69 if (config == null) {
70 logger.error(" oai configure file is null");
71 return false;
72 }
73 oai_config = config;
74 resume_after = getResumeAfter();
75
76 //clear out expired resumption tokens stored in OAIResumptionToken.xml
77 OAIXML.init();
78 OAIXML.clearExpiredTokens();
79
80 return true;
81 }
82 /** process using strings - just calls process using Elements */
83 public String process(String xml_in) {
84
85 Node message_node = this.converter.getDOM(xml_in);
86 Node page = process(message_node);
87 return this.converter.getString(page);
88 }
89
90 //Compose a message element used to send back to the OAIServer servlet.
91 //This method is only used within OAIReceptionist
92 private Element getMessage(Element e) {
93 Element msg = OAIXML.createElement(OAIXML.MESSAGE);
94 msg.appendChild(OAIXML.getResponse(e));
95 return msg;
96 }
97 /** process - produce xml data in response to a request
98 * if something goes wrong, it returns null -
99 */
100 public Node process(Node message_node) {
101 logger.error("OAIReceptionist received request");
102
103 Element message = this.converter.nodeToElement(message_node);
104 logger.error(this.converter.getString(message));
105
106 // check that its a correct message tag
107 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
108 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
109 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
110 }
111
112 // get the request out of the message - assume that there is only one
113 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
114 if (request == null) {
115 logger.error(" message had no request!");
116 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
117 }
118 //At this stage, the value of 'to' attribute of the request must be the 'verb'
119 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
120 String verb = request.getAttribute(GSXML.TO_ATT);
121 if (verb.equals(OAIXML.IDENTIFY)) {
122 return doIdentify();
123 }
124 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
125 return doListMetadataFormats(message);
126 }
127 if (verb.equals(OAIXML.LIST_SETS)) {
128 return doListSets(message);
129 }
130 if (verb.equals(OAIXML.GET_RECORD)) {
131 return doGetRecord(message);
132 }
133 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
134 return doListIdentifiers(message);
135 }
136 if (verb.equals(OAIXML.LIST_RECORDS)) {
137 return doListRecords(message);
138 }
139 return getMessage(OAIXML.createErrorElement("Unexpected things happened", ""));
140
141 }
142 /** send a request to the message router asking for a list of collections that support oai
143 * The type attribute must be changed from 'oaiService' to 'oaiSetList'
144 */
145 private NodeList getOAICollectionList() {
146 Element message = this.doc.createElement(OAIXML.MESSAGE);
147 Element request = this.doc.createElement(OAIXML.REQUEST);
148 message.appendChild(request);
149 request.setAttribute(OAIXML.TYPE, OAIXML.OAI_SET_LIST);
150 request.setAttribute(OAIXML.TO, "");
151 Node msg_node = mr.process(message);
152
153 if (msg_node == null) {
154 logger.error("returned msg_node from mr is null");
155 return null;
156 }
157 Element resp = (Element)GSXML.getChildByTagName(msg_node, OAIXML.RESPONSE);
158 Element coll_list = (Element)GSXML.getChildByTagName(resp, OAIXML.COLLECTION_LIST);
159 if (coll_list == null) {
160 logger.error("coll_list is null");
161 return null;
162 }
163 //logger.info(GSXML.xmlNodeToString(coll_list));
164 NodeList list = coll_list.getElementsByTagName(OAIXML.COLLECTION);
165 int length = list.getLength();
166 if (length == 0) {
167 logger.error("length is 0");
168 return null;
169 }
170 return list;
171 }
172 /**Exclusively called by doListSets()*/
173 private void getSets(Element list_sets_elem, NodeList oai_coll, int start_point, int end_point) {
174 for (int i=start_point; i<end_point; i++) {
175 String coll_spec = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
176 String coll_name = coll_spec.substring(coll_spec.indexOf(":") + 1);
177 Element set = OAIXML.createElement(OAIXML.SET);
178 Element set_spec = OAIXML.createElement(OAIXML.SET_SPEC);
179 GSXML.setNodeText(set_spec, coll_spec);
180 set.appendChild(set_spec);
181 Element set_name = OAIXML.createElement(OAIXML.SET_NAME);
182 GSXML.setNodeText(set_name, coll_name);
183 set.appendChild(set_name);
184 list_sets_elem.appendChild(set);
185 }
186 }
187 private int getResumeAfter() {
188 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
189 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
190 return -1;
191 }
192 /** method to compose a set element
193 */
194 private Element doListSets(Element msg){
195 logger.info("");
196 // option: resumptionToken
197 // exceptions: badArgument, badResumptionToken, noSetHierarchy
198 Element list_sets_elem = OAIXML.createElement(OAIXML.LIST_SETS);
199
200 //ask the message router for a list of oai collections
201 NodeList oai_coll = getOAICollectionList();
202 int oai_coll_size = oai_coll.getLength();
203 if (oai_coll_size == 0) {
204 return getMessage(list_sets_elem);
205 }
206
207 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
208 if (req == null) {
209 logger.error("req is null");
210 return null;
211 }
212 //params list only contains the parameters other than the verb
213 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
214 Element param = null;
215 int smaller = (oai_coll_size>resume_after)? resume_after : oai_coll_size;
216 if (params.getLength() > 1) {
217 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
218 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
219 }
220 if(params.getLength() == 0) {
221 //this is requesting a list of sets in the whole repository
222 /** there is no resumeptionToken in the request, we check whether we need
223 * to send out resumeptionToken by comparing the total number of sets in this
224 * repository and the specified value of resumeAfter
225 */
226 if(resume_after < 0 || oai_coll_size <= resume_after) {
227 //send the whole list of records
228 //all data are sent on the first request. Therefore there should be
229 //no resumeptionToken stored in OAIConfig.xml.
230 //As long as the verb is 'ListSets', we ignore the rest of the parameters
231 getSets(list_sets_elem, oai_coll, 0, oai_coll_size);
232 return getMessage(list_sets_elem);
233 }
234
235 //append required sets to list_sets_elem (may be a complete or incomplete list)
236 getSets(list_sets_elem, oai_coll, 0, smaller);
237
238 if(oai_coll_size > resume_after) {
239 //An incomplete list is sent; append a resumptionToken element
240 Element token = createResumptionTokenElement(oai_coll_size, 0, resume_after, true);
241 //store this token
242 OAIXML.addToken(token);
243
244 list_sets_elem.appendChild(token);
245 }
246
247 return getMessage(list_sets_elem);
248 }
249
250 // The url should contain only one param called resumptionToken
251 // This is requesting a subsequent part of a list, following a previously sent incomplete list
252 param = (Element)params.item(0);
253 String param_name = param.getAttribute(OAIXML.NAME);
254 if (!param_name.equals(OAIXML.RESUMPTION_TOKEN)) {
255 //Bad argument
256 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
257 }
258 //get the token
259 String token = param.getAttribute(OAIXML.VALUE);
260 //validate the token string (the string has already been decoded in OAIServer, e.g.,
261 // replace %3A with ':')
262 if(OAIXML.containsToken(token) == false) {
263 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
264 }
265 //take out the cursor value, which is the size of previously sent list
266 int index = token.indexOf(":");
267 int cursor = Integer.parseInt(token.substring(index + 1));
268 Element token_elem = null;
269
270 // are we sending the final part of a complete list?
271 if(cursor + resume_after >= oai_coll_size) {
272 //Yes, we are.
273 //append required sets to list_sets_elem (list is complete)
274 getSets(list_sets_elem, oai_coll, cursor, oai_coll_size);
275 //An incomplete list is sent; append a resumptionToken element
276 token_elem = createResumptionTokenElement(oai_coll_size, cursor, -1, false);
277 list_sets_elem.appendChild(token_elem);
278 } else {
279 //No, we are not.
280 //append required sets to list_sets_elem (list is incomplete)
281 getSets(list_sets_elem, oai_coll, cursor, cursor + resume_after);
282 token_elem = createResumptionTokenElement(oai_coll_size, cursor, cursor + resume_after, true);
283 //store this token
284 OAIXML.addToken(token_elem);
285 list_sets_elem.appendChild(token_elem);
286 }
287 return getMessage(list_sets_elem);
288 }
289 private Element createResumptionTokenElement(int total_size, int cursor, int so_far_sent, boolean set_expiration, String metadata_prefix) {
290 Element token = OAIXML.createElement(OAIXML.RESUMPTION_TOKEN);
291 token.setAttribute(OAIXML.COMPLETE_LIST_SIZE, "" + total_size);
292 token.setAttribute(OAIXML.CURSOR, "" + cursor);
293
294 if(set_expiration) {
295 /** read the resumptionTokenExpiration element in OAIConfig.xml and get the specified time value
296 * Use the time value plus the current system time to get the expiration date string.
297 */
298 String expiration_date = OAIXML.getTime(System.currentTimeMillis() + OAIXML.getTokenExpiration()); // in milliseconds
299 token.setAttribute(OAIXML.EXPIRATION_DATE, expiration_date);
300 }
301
302 if(so_far_sent > 0) {
303 //the format of resumptionToken is not defined by the OAI-PMH and should be
304 //considered opaque by the harvester (in other words, strictly follow what the
305 //data provider has to offer
306 //Here, we make use of the uniqueness of the system time
307 String tokenValue = OAIXML.GS3OAI + System.currentTimeMillis() + ":" + so_far_sent;
308 if(!metadata_prefix.equals("")) {
309 tokenValue = tokenValue + ":" + metadata_prefix;
310 }
311 GSXML.setNodeText(token, tokenValue);
312 }
313 return token;
314 }
315
316 private Element createResumptionTokenElement(int total_size, int cursor, int so_far_sent, boolean set_expiration) {
317 return createResumptionTokenElement(total_size, cursor, so_far_sent, set_expiration, ""); // empty metadata_prefix
318 }
319
320 /** if the param_map contains strings other than those in valid_strs, return false;
321 * otherwise true.
322 */
323 private boolean isValidParam(HashMap param_map, HashSet valid_strs) {
324 ArrayList param_list = new ArrayList(param_map.keySet());
325 for(int i=0; i<param_list.size(); i++) {
326 if (valid_strs.contains((String)param_list.get(i)) == false) {
327 return false;
328 }
329 }
330 return true;
331 }
332 private Element doListIdentifiers(Element msg) {
333 // option: from, until, set, metadataPrefix, resumptionToken
334 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
335 HashSet valid_strs = new HashSet();
336 valid_strs.add(OAIXML.FROM);
337 valid_strs.add(OAIXML.UNTIL);
338 valid_strs.add(OAIXML.SET);
339 valid_strs.add(OAIXML.METADATA_PREFIX);
340 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
341
342 Element list_identifiers = OAIXML.createElement(OAIXML.LIST_IDENTIFIERS);
343 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
344 if (req == null) { logger.error("req is null"); return null; }
345 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
346 String coll_name = "";
347 String token = "";
348
349 HashMap param_map = OAIXML.getParamMap(params);
350 if (!isValidParam(param_map, valid_strs)) {
351 logger.error("One of the params is invalid");
352 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
353 }
354 // param keys are valid, but if there are any date params, check they're of the right format
355 String from = (String)param_map.get(OAIXML.FROM);
356 if(from != null) {
357 Date from_date = OAIXML.getDate(from);
358 if(from_date == null) {
359 logger.error("invalid date: " + from);
360 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
361 }
362 }
363 String until = (String)param_map.get(OAIXML.UNTIL);
364 if(until != null) {
365 Date until_date = OAIXML.getDate(until);
366 if(until_date == null) {
367 logger.error("invalid date: " + until);
368 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
369 }
370 }
371 if(from != null && until != null) { // check they are of the same date-time format (granularity)
372 if(from.length() != until.length()) {
373 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
374 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
375 }
376 }
377
378 //ask the message router for a list of oai collections
379 NodeList oai_coll = getOAICollectionList();
380 int oai_coll_size = oai_coll.getLength();
381 if (oai_coll_size == 0) {
382 logger.info("returned oai collection list is empty");
383 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
384 }
385
386 //Now we check if the optional argument 'set' has been specified in the params; if so,
387 //whether the specified setSpec is supported by this repository
388 boolean request_set = param_map.containsKey(OAIXML.SET);
389 if(request_set == true) {
390 boolean set_supported = false;
391 String set_spec_str = (String)param_map.get(OAIXML.SET);
392 // get the collection name
393 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
394 String[] strs = splitSetSpec(set_spec_str);
395 coll_name = strs[1];
396
397 for(int i=0; i<oai_coll_size; i++) {
398 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
399 set_supported = true;
400 }
401 }
402 if(set_supported == false) {
403 logger.error("requested set is not found in this repository");
404 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
405 }
406 }
407
408 //Is there a resumptionToken included which is requesting an incomplete list?
409 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
410 // validate resumptionToken
411 token = (String)param_map.get(OAIXML.RESUMPTION_TOKEN);
412 logger.info("has resumptionToken" + token);
413 if(OAIXML.containsToken(token) == false) {
414 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
415 }
416 }
417
418 // Custom test that expects a metadataPrefix comes here at end so that the official params can
419 // be tested first for errors and their error responses sent off. Required for OAI validation
420 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
421 logger.error("contains invalid params or no metadataPrefix");
422 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
423 }
424
425 //Now that we got a prefix, check and see if it's supported by this repository
426 String prefix_value = (String)param_map.get(OAIXML.METADATA_PREFIX);
427 if (containsMetadataPrefix(prefix_value) == false) {
428 logger.error("requested prefix is not found in OAIConfig.xml");
429 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
430 }
431
432 //Now that all validation has been done, I hope, we can send request to the message router
433 Element result = null;
434 String verb = req.getAttribute(OAIXML.TO);
435 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
436 ArrayList retain_param_list = new ArrayList();
437 for (int j=0; j<param_list.getLength(); j++) {
438 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
439 retain_param_list.add(e);
440 }
441
442 //re-organize the request element
443 // reset the 'to' attribute
444 if (request_set == false) {
445 logger.info("requesting identifiers of all collections");
446 for(int i=0; i<oai_coll_size; i++) {
447 if(req == null) {
448 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
449 msg.appendChild(req);
450 for (int j=0; j<retain_param_list.size(); j++) {
451 req.appendChild((Element)retain_param_list.get(j));
452 }
453 }
454 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
455 coll_name = full_name.substring(full_name.indexOf(":") + 1);
456 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
457 Node n = mr.process(msg);
458 Element e = converter.nodeToElement(n);
459 result = collectAll(result, e, verb, OAIXML.HEADER);
460
461 //clear the content of the old request element
462 msg.removeChild(req);
463 req = null;
464 }
465 } else {
466 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
467 Node result_node = mr.process(msg);
468 result = converter.nodeToElement(result_node);
469 }
470
471 if (result == null) {
472 logger.info("message router returns null");
473 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
474 }
475 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
476 if(res == null) {
477 logger.info("response element in xml_result is null");
478 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
479 }
480 NodeList header_list = res.getElementsByTagName(OAIXML.HEADER);
481 int num_headers = header_list.getLength();
482 if(num_headers == 0) {
483 logger.info("message router returns 0 headers.");
484 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
485 }
486
487 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
488 // save the token as well.
489 if (token.equals("") == true) {
490 if(resume_after < 0 || num_headers <= resume_after) {
491 //send the whole list of records
492 return result;
493 }
494
495 //append required number of records (may be a complete or incomplete list)
496 getRecords(list_identifiers, header_list, 0, resume_after);
497 //An incomplete list is sent; append a resumptionToken element
498 Element token_elem = createResumptionTokenElement(num_headers, 0, resume_after, true);
499 //store this token
500 OAIXML.addToken(token_elem);
501
502 list_identifiers.appendChild(token_elem);
503 return getMessage(list_identifiers);
504 }
505
506 if (token.equals("") == false) {
507 //get an appropriate number of records (partial list) according to the token
508 //take out the cursor value, which is the size of previously sent list
509 int index = token.indexOf(":");
510 int cursor = Integer.parseInt(token.substring(index + 1));
511 Element token_elem = null;
512
513 // are we sending the final part of a complete list?
514 if(cursor + resume_after >= num_headers) {
515 //Yes, we are.
516 //append required records to list_records (list is complete)
517 getRecords(list_identifiers, header_list, cursor, num_headers);
518 //An incomplete list is sent; append a resumptionToken element
519 token_elem = createResumptionTokenElement(num_headers, cursor, -1, false);
520 list_identifiers.appendChild(token_elem);
521 } else {
522 //No, we are not.
523 //append required records to list_records (list is incomplete)
524 getRecords(list_identifiers, header_list, cursor, cursor + resume_after);
525 token_elem = createResumptionTokenElement(num_headers, cursor, cursor + resume_after, true);
526 //store this token
527 OAIXML.addToken(token_elem);
528 list_identifiers.appendChild(token_elem);
529 }
530
531 return getMessage(list_identifiers);
532 }//end of if(!token.equals(""))
533
534 return result;
535 }
536 private Element doListRecords(Element msg){
537 logger.info("");
538 // option: from, until, set, metadataPrefix, and resumptionToken
539 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
540 HashSet valid_strs = new HashSet();
541 valid_strs.add(OAIXML.FROM);
542 valid_strs.add(OAIXML.UNTIL);
543 valid_strs.add(OAIXML.SET);
544 valid_strs.add(OAIXML.METADATA_PREFIX);
545 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
546
547 Element list_records = OAIXML.createElement(OAIXML.LIST_RECORDS);
548 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
549 if (req == null) { logger.error("req is null"); return null; }
550 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
551
552 String coll_name = "";
553 String token = "";
554
555 if(params.getLength() == 0) {
556 logger.error("must at least have the metadataPrefix parameter, can't be none");
557 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
558 }
559
560 HashMap param_map = OAIXML.getParamMap(params);
561 if (!isValidParam(param_map, valid_strs)) {
562 logger.error("One of the params is invalid");
563 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
564 }
565 // param keys are valid, but if there are any date params, check they're of the right format
566 String from = (String)param_map.get(OAIXML.FROM);
567 if(from != null) {
568 Date from_date = OAIXML.getDate(from);
569 if(from_date == null) {
570 logger.error("invalid date: " + from);
571 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
572 }
573 }
574 String until = (String)param_map.get(OAIXML.UNTIL);
575 Date until_date = null;
576 if(until != null) {
577 until_date = OAIXML.getDate(until);
578 if(until_date == null) {
579 logger.error("invalid date: " + until);
580 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
581 }
582 }
583 if(from != null && until != null) { // check they are of the same date-time format (granularity)
584 if(from.length() != until.length()) {
585 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
586 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
587 }
588 }
589
590 //ask the message router for a list of oai collections
591 NodeList oai_coll = getOAICollectionList();
592 int oai_coll_size = oai_coll.getLength();
593 if (oai_coll_size == 0) {
594 logger.info("returned oai collection list is empty");
595 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
596 }
597
598 //Now we check if the optional argument 'set' has been specified in the params; if so,
599 //whether the specified setSpec is supported by this repository
600 boolean request_set = param_map.containsKey(OAIXML.SET);
601 if(request_set == true) {
602 boolean set_supported = false;
603 String set_spec_str = (String)param_map.get(OAIXML.SET);
604 // get the collection name
605 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
606 String[] strs = splitSetSpec(set_spec_str);
607// name_of_site = strs[0];
608 coll_name = strs[1];
609 //logger.info("param contains set: "+coll_name);
610
611 for(int i=0; i<oai_coll_size; i++) {
612 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
613 set_supported = true;
614 }
615 }
616 if(set_supported == false) {
617 logger.error("requested set is not found in this repository");
618 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
619 }
620 }
621
622 //Is there a resumptionToken included which is requesting an incomplete list?
623 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
624 // validate resumptionToken
625 //if (the token value is not found in the token xml file) {
626 // return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
627 //} else {
628 // use the request to get a complete list of records from the message router
629 // and issue the subsequent part of that complete list according to the token.
630 // store a new token if necessary.
631 //}
632 token = (String)param_map.get(OAIXML.RESUMPTION_TOKEN);
633 logger.info("has resumptionToken: " + token);
634 if(OAIXML.containsToken(token) == false) {
635 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
636 }
637 }
638
639 // Moved the additional custom test that mandates the metadataPrefix here, since official
640 // errors should be caught first, so that their error responses can be sent off first
641 // such that GS2's oaiserver will validate properly.
642 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
643 if(!token.equals("")) { // resumptiontoken
644 int lastIndex = token.lastIndexOf(":");
645 if(lastIndex != token.indexOf(":")) { // if a meta_prefix is suffixed to the usual token,
646 // put that in the map and remove it from the end of the stored token
647 String meta_prefix = token.substring(lastIndex+1);
648 param_map.put(OAIXML.METADATA_PREFIX, meta_prefix);
649 token = token.substring(0, lastIndex);
650 param_map.put(OAIXML.RESUMPTION_TOKEN, token);
651
652 // Add to request <param name="metadataPrefix" value="oai_dc"/>
653 // need to add metaprefix as param to request, else a request
654 // for subsequent records when working with resumption tokens will fail
655 Element paramEl = req.getOwnerDocument().createElement(OAIXML.PARAM);
656 paramEl.setAttribute(OAIXML.NAME, OAIXML.METADATA_PREFIX);
657 paramEl.setAttribute(OAIXML.VALUE, meta_prefix);
658 req.appendChild(paramEl);
659 }
660 } else { // no metadata_prefix
661
662 // it must have a metadataPrefix
663 /** Here I disagree with the OAI specification: even if a resumptionToken is
664 * included in the request, the metadataPrefix is a must argument. Otherwise
665 * how would we know what metadataPrefix the harvester requested in his last request?
666 */
667 logger.error("no metadataPrefix");
668 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
669 }
670 }
671
672 //Now that we got a prefix, check and see if it's supported by this repository
673 String prefix_value = (String)param_map.get(OAIXML.METADATA_PREFIX);
674 if (containsMetadataPrefix(prefix_value) == false) {
675 logger.error("requested prefix is not found in OAIConfig.xml");
676 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
677 }
678
679
680 //Now that all validation has been done, I hope, we can send request to the message router
681 Element result = null;
682 String verb = req.getAttribute(OAIXML.TO);
683 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
684 ArrayList retain_param_list = new ArrayList();
685 for (int j=0; j<param_list.getLength(); j++) {
686 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
687 retain_param_list.add(e);
688 }
689
690 //re-organize the request element
691 // reset the 'to' attribute
692 if (request_set == false) {
693 //coll_name could be "", which means it's requesting all records of all collections
694 //we send a request to each collection asking for its records
695 for(int i=0; i<oai_coll_size; i++) {
696 if(req == null) {
697 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
698 msg.appendChild(req);
699 for (int j=0; j<retain_param_list.size(); j++) {
700 req.appendChild((Element)retain_param_list.get(j));
701 }
702 }
703 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
704 coll_name = full_name.substring(full_name.indexOf(":") + 1);
705 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
706 //logger.info(GSXML.xmlNodeToString(req));
707 Node n = mr.process(msg);
708 Element e = converter.nodeToElement(n);
709 result = collectAll(result, e, verb, OAIXML.RECORD);
710
711 //clear the content of the old request element
712 msg.removeChild(req);
713 req = null;
714 }
715 } else {
716 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
717
718 Node result_node = mr.process(msg);
719 result = converter.nodeToElement(result_node);
720 }
721
722 if (result == null) {
723 logger.info("message router returns null");
724 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
725 }
726 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
727 if(res == null) {
728 logger.info("response element in xml_result is null");
729 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
730 }
731 NodeList record_list = res.getElementsByTagName(OAIXML.RECORD);
732 int num_records = record_list.getLength();
733 if(num_records == 0) {
734 logger.info("message router returns 0 records.");
735 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
736 }
737
738 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
739 // save the token as well.
740 if (token.equals("") == true) {
741 if(resume_after < 0 || num_records <= resume_after) {
742 //send the whole list of records
743 return result;
744 }
745
746 //append required number of records (may be a complete or incomplete list)
747 getRecords(list_records, record_list, 0, resume_after);
748 //An incomplete list is sent; append a resumptionToken element
749 Element token_elem = createResumptionTokenElement(num_records, 0, resume_after, true, (String)param_map.get(OAIXML.METADATA_PREFIX));
750 //store this token
751 OAIXML.addToken(token_elem);
752
753 list_records.appendChild(token_elem);
754 return getMessage(list_records);
755 }
756
757 if (token.equals("") == false) {
758 //get an appropriate number of records (partial list) according to the token
759 //take out the cursor value, which is the size of previously sent list
760 int index = token.indexOf(":");
761 int cursor = Integer.parseInt(token.substring(index + 1));
762 Element token_elem = null;
763
764 // are we sending the final part of a complete list?
765 if(cursor + resume_after >= num_records) {
766 //Yes, we are.
767 //append required records to list_records (list is complete)
768 getRecords(list_records, record_list, cursor, num_records);
769 //An incomplete list is sent; append a resumptionToken element
770 token_elem = createResumptionTokenElement(num_records, cursor, -1, false, (String)param_map.get(OAIXML.METADATA_PREFIX));
771 list_records.appendChild(token_elem);
772
773 } else {
774 //No, we are not.
775 //append required records to list_records (list is incomplete)
776 getRecords(list_records, record_list, cursor, cursor + resume_after);
777 token_elem = createResumptionTokenElement(num_records, cursor, cursor + resume_after, true, (String)param_map.get(OAIXML.METADATA_PREFIX));
778 //store this token
779 OAIXML.addToken(token_elem);
780 list_records.appendChild(token_elem);
781 }
782
783 return getMessage(list_records);
784 }//end of if(!token.equals(""))
785
786 return result;//a backup return
787 }
788 // method exclusively used by doListRecords/doListIdentifiers
789 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
790 for (int i=start_point; i<end_point; i++) {
791 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
792 }
793 }
794 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
795 if(result == null) {
796 //in the first round, result is null
797 return msg;
798 }
799 Element res_in_result = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
800 if(res_in_result == null) { // return the results of all other collections accumulated so far
801 return msg;
802 }
803 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
804 if(msg == null) {
805 return result;
806 }
807
808 //e.g., get all <record> elements from the returned message. There may be none of
809 //such element, for example, the collection service returned an error message
810 NodeList elem_list = msg.getElementsByTagName(elem_name);
811
812 for (int i=0; i<elem_list.getLength(); i++) {
813 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
814 }
815 return result;
816 }
817 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormats.
818 * The first one is handled here, and the last two are processed by OAIPMH.
819 */
820 private Element doListMetadataFormats(Element msg) {
821 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
822 //, or there is no parameter; otherwise it is an error
823 //logger.info("" + this.converter.getString(msg));
824
825 Element list_metadata_formats = OAIXML.createElement(OAIXML.LIST_METADATA_FORMATS);
826
827 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
828 if (req == null) { logger.error(""); return null; }
829 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
830 Element param = null;
831 if(params.getLength() == 0) {
832 //this is requesting metadata formats for the whole repository
833 //read the oaiConfig.xml file, return the metadata formats specified there.
834 Element oai_config = OAIXML.getOAIConfigXML();
835 if (oai_config == null) {
836 return getMessage(OAIXML.createErrorElement(OAIXML.ERROR, OAIXML.SERVICE_UNAVAILABLE));
837 } else {
838 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
839 if(format_list == null) {
840 logger.error("OAIConfig.xml must contain the supported metadata formats");
841 return getMessage(list_metadata_formats);
842 }
843 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
844 for(int i=0; i<formats.getLength(); i++) {
845 Element meta_fmt = OAIXML.createElement(OAIXML.METADATA_FORMAT);
846 Element first_meta_format = (Element)formats.item(i);
847 //the element also contains mappings, but we don't want them
848 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
849 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
850 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
851 list_metadata_formats.appendChild(meta_fmt);
852 }
853 return getMessage(list_metadata_formats);
854 }
855
856 } else if (params.getLength() > 1) {
857 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
858 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
859 } else {
860 // This is a request for the metadata of a particular item with an identifier
861 /**the request xml is in the form: <request>
862 * <param name=.../>
863 * </request>
864 *And there is a param element and one element only. (No paramList element in between).
865 */
866 param = (Element)params.item(0);
867 String param_name = param.getAttribute(OAIXML.NAME);
868 String identifier = "";
869 if (!param_name.equals(OAIXML.IDENTIFIER)) {
870 //Bad argument
871 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
872 } else {
873 identifier = param.getAttribute(OAIXML.VALUE);
874 // the identifier is in the form: <site_name>:<coll_name>:<OID>
875 // so it must contain at least two ':' characters
876 String[] strs = identifier.split(":");
877 if(strs == null || strs.length < 3) {
878 // the OID may also contain ':'
879 logger.error("identifier is not in the form site:coll:id" + identifier);
880 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
881 }
882
883 // send request to message router
884 // get the names
885 strs = splitNames(identifier);
886 if(strs == null || strs.length < 3) {
887 logger.error("identifier is not in the form site:coll:id" + identifier);
888 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
889 }
890 String name_of_site = strs[0];
891 String coll_name = strs[1];
892 String oid = strs[2];
893
894 //re-organize the request element
895 // reset the 'to' attribute
896 String verb = req.getAttribute(OAIXML.TO);
897 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
898 // reset the identifier element
899 param.setAttribute(OAIXML.NAME, OAIXML.OID);
900 param.setAttribute(OAIXML.VALUE, oid);
901
902 //Now send the request to the message router to process
903 Node result_node = mr.process(msg);
904 return converter.nodeToElement(result_node);
905 }
906 }
907
908 }
909 private void appendParam(Element req, String name, String value) {
910 Element param = req.getOwnerDocument().createElement(OAIXML.PARAM);
911 param.setAttribute(OAIXML.NAME, name);
912 param.setAttribute(OAIXML.VALUE, value);
913 req.appendChild(param);
914 }
915 private void copyElement(Element identify, String tag_name) {
916 Element from_repository_name = (Element)GSXML.getChildByTagName(oai_config, tag_name);
917 if(from_repository_name != null) {
918 Element this_repository_name = OAIXML.createElement(tag_name);
919 GSXML.setNodeText(this_repository_name, GSXML.getNodeText(from_repository_name));
920 identify.appendChild(this_repository_name);
921 }
922 }
923 private Element doIdentify() {
924 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
925 logger.info("");
926
927 Element identify = OAIXML.createElement(OAIXML.IDENTIFY);
928 //do the repository name
929 copyElement(identify, OAIXML.REPOSITORY_NAME);
930 //do the baseurl
931 copyElement(identify, OAIXML.BASE_URL);
932 //do the protocol version
933 copyElement(identify, OAIXML.PROTOCOL_VERSION);
934
935 //There can be more than one admin email according to the OAI specification
936 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
937 int num_admin = 0;
938 Element from_admin_email = null;
939 if (admin_emails != null) {
940 num_admin = admin_emails.getLength();
941 }
942 for (int i=0; i<num_admin; i++) {
943 copyElement(identify, OAIXML.ADMIN_EMAIL);
944 }
945
946 //do the earliestDatestamp
947 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
948 //ask the message router for a list of oai collections
949 NodeList oai_coll = getOAICollectionList();
950 long earliestDatestamp = getEarliestDateStamp(oai_coll);
951 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
952 Element earliestDatestamp_elem = OAIXML.createElement(OAIXML.EARLIEST_DATESTAMP);
953 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
954 identify.appendChild(earliestDatestamp_elem);
955
956 //do the deletedRecord
957 copyElement(identify, OAIXML.DELETED_RECORD);
958 //do the granularity
959 copyElement(identify, OAIXML.GRANULARITY);
960
961 return getMessage(identify);
962 }
963 //split setSpec (site_name:coll_name) into an array of strings
964 //It has already been checked that the set_spec contains at least one ':'
965 private String[] splitSetSpec(String set_spec) {
966 logger.info(set_spec);
967 String[] strs = new String[2];
968 int colon_index = set_spec.indexOf(":");
969 strs[0] = set_spec.substring(0, colon_index);
970 strs[1] = set_spec.substring(colon_index + 1);
971 return strs;
972 }
973 /** split the identifier into <site + collection + OID> as an array
974 It has already been checked that the 'identifier' contains at least two ':'
975 */
976 private String[] splitNames(String identifier) {
977 logger.info(identifier);
978 String [] strs = new String[3];
979 int first_colon = identifier.indexOf(":");
980 if(first_colon == -1) {
981 return null;
982 }
983 strs[0] = identifier.substring(0, first_colon);
984
985 String sr = identifier.substring(first_colon + 1);
986 int second_colon = sr.indexOf(":");
987 //logger.error(first_colon + " " + second_colon);
988 strs[1] = sr.substring(0, second_colon);
989
990 strs[2] = sr.substring(second_colon + 1);
991 return strs;
992 }
993 /** validate if the specified metadata prefix value is supported by the repository
994 * by checking it in the OAIConfig.xml
995 */
996 private boolean containsMetadataPrefix(String prefix_value) {
997 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
998
999 for(int i=0; i<prefix_list.getLength(); i++) {
1000 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
1001 return true;
1002 }
1003 }
1004 return false;
1005 }
1006 private Element doGetRecord(Element msg){
1007 logger.info("");
1008 /** arguments:
1009 identifier: required
1010 metadataPrefix: required
1011 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
1012 */
1013 Element get_record = OAIXML.createElement(OAIXML.GET_RECORD);
1014
1015 HashSet valid_strs = new HashSet();
1016 valid_strs.add(OAIXML.IDENTIFIER);
1017 valid_strs.add(OAIXML.METADATA_PREFIX);
1018
1019 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
1020 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
1021 HashMap param_map = OAIXML.getParamMap(params);
1022
1023 if(!isValidParam(param_map, valid_strs) ||
1024 params.getLength() == 0 ||
1025 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
1026 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
1027 logger.error("must have the metadataPrefix/identifier parameter.");
1028 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
1029 }
1030
1031 String prefix = (String)param_map.get(OAIXML.METADATA_PREFIX);
1032 String identifier = (String)param_map.get(OAIXML.IDENTIFIER);
1033
1034 // verify the metadata prefix
1035 if (containsMetadataPrefix(prefix) == false) {
1036 logger.error("requested prefix is not found in OAIConfig.xml");
1037 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
1038 }
1039
1040 // get the names
1041 String[] strs = splitNames(identifier);
1042 if(strs == null || strs.length < 3) {
1043 logger.error("identifier is not in the form site:coll:id" + identifier);
1044 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
1045 }
1046 String name_of_site = strs[0];
1047 String coll_name = strs[1];
1048 String oid = strs[2];
1049
1050 //re-organize the request element
1051 // reset the 'to' attribute
1052 String verb = req.getAttribute(OAIXML.TO);
1053 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
1054 // reset the identifier element
1055 Element param = GSXML.getNamedElement(req, OAIXML.PARAM, OAIXML.NAME, OAIXML.IDENTIFIER);
1056 if (param != null) {
1057 param.setAttribute(OAIXML.NAME, OAIXML.OID);
1058 param.setAttribute(OAIXML.VALUE, oid);
1059 }
1060
1061 //Now send the request to the message router to process
1062 Node result_node = mr.process(msg);
1063 return converter.nodeToElement(result_node);
1064 }
1065
1066 // See OAIConfig.xml
1067 // dynamically works out what the earliestDateStamp is, since it varies by collection
1068 // returns this time in *milliseconds*.
1069 protected long getEarliestDateStamp(NodeList oai_coll) {
1070 //do the earliestDatestamp
1071 long earliestDatestamp = System.currentTimeMillis();
1072 int oai_coll_size = oai_coll.getLength();
1073 if (oai_coll_size == 0) {
1074 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be 1970-01-01.");
1075 earliestDatestamp = 0;
1076 }
1077 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1078 // we get the earliestDatestamp among the collections
1079 for(int i=0; i<oai_coll_size; i++) {
1080 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1081 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1082 }
1083
1084 return earliestDatestamp*1000; // converting from seconds to milliseconds
1085 }
1086}
Note: See TracBrowser for help on using the repository browser.