source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

File size: 45.8 KB
Line 
1package org.greenstone.gsdl3.core;
2
3import org.greenstone.gsdl3.util.*;
4import org.greenstone.gsdl3.action.*;
5// XML classes
6import org.w3c.dom.Node;
7import org.w3c.dom.NodeList;
8import org.w3c.dom.Document;
9import org.w3c.dom.Element;
10
11// other java classes
12import java.io.File;
13import java.util.*;
14
15import org.apache.log4j.*;
16
17/** a Receptionist, used for oai metadata response xml generation.
18 * This receptionist talks to the message router directly,
19 * instead of via any action, hence no action map is needed.
20 * @see the basic Receptionist
21 */
22public class OAIReceptionist implements ModuleInterface {
23
24 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
25
26 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
27 protected String site_name = null;
28 /** container Document to create XML Nodes for requests sent to message router
29 * Not used for response
30 */
31 protected Document doc=null;
32
33 /** a converter class to parse XML and create Docs */
34 protected XMLConverter converter=null;
35
36 /** the configure file of this receptionist passed from the oai servlet. */
37 protected Element oai_config = null;
38
39 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
40 protected int resume_after = -1 ;
41
42 /** the message router that the Receptionist and Actions will talk to */
43 protected ModuleInterface mr = null;
44
45 public OAIReceptionist() {
46 this.converter = new XMLConverter();
47 this.doc = this.converter.newDOM();
48
49 }
50
51 public void cleanUp() {}
52
53 public void setSiteName(String site_name) {
54 this.site_name = site_name;
55 }
56 /** sets the message router - it should already be created and
57 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
58 public void setMessageRouter(ModuleInterface mr) {
59 this.mr = mr;
60 }
61
62 /** configures the receptionist */
63 public boolean configure(Element config) {
64
65 if (this.mr==null) {
66 logger.error(" message routers must be set before calling oai configure");
67 return false;
68 }
69 if (config == null) {
70 logger.error(" oai configure file is null");
71 return false;
72 }
73 oai_config = config;
74 resume_after = getResumeAfter();
75
76 //clear out expired resumption tokens stored in OAIResumptionToken.xml
77 OAIXML.init();
78 OAIXML.clearExpiredTokens();
79
80 return true;
81 }
82 /** process using strings - just calls process using Elements */
83 public String process(String xml_in) {
84
85 Node message_node = this.converter.getDOM(xml_in);
86 Node page = process(message_node);
87 return this.converter.getString(page);
88 }
89
90 //Compose a message element used to send back to the OAIServer servlet.
91 //This method is only used within OAIReceptionist
92 private Element getMessage(Element e) {
93 Element msg = OAIXML.createElement(OAIXML.MESSAGE);
94 msg.appendChild(OAIXML.getResponse(e));
95 return msg;
96 }
97 /** process - produce xml data in response to a request
98 * if something goes wrong, it returns null -
99 */
100 public Node process(Node message_node) {
101 logger.error("OAIReceptionist received request");
102
103 Element message = this.converter.nodeToElement(message_node);
104 logger.error(this.converter.getString(message));
105
106 // check that its a correct message tag
107 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
108 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
109 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
110 }
111
112 // get the request out of the message - assume that there is only one
113 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
114 if (request == null) {
115 logger.error(" message had no request!");
116 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
117 }
118 //At this stage, the value of 'to' attribute of the request must be the 'verb'
119 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
120 String verb = request.getAttribute(GSXML.TO_ATT);
121 if (verb.equals(OAIXML.IDENTIFY)) {
122 return doIdentify();
123 }
124 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
125 return doListMetadataFormats(message);
126 }
127 if (verb.equals(OAIXML.LIST_SETS)) {
128 return doListSets(message);
129 }
130 if (verb.equals(OAIXML.GET_RECORD)) {
131 return doGetRecord(message);
132 }
133 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
134 return doListIdentifiers(message);
135 }
136 if (verb.equals(OAIXML.LIST_RECORDS)) {
137 return doListRecords(message);
138 }
139 return getMessage(OAIXML.createErrorElement("Unexpected things happened", ""));
140
141 }
142 /** send a request to the message router asking for a list of collections that support oai
143 * The type attribute must be changed from 'oaiService' to 'oaiSetList'
144 */
145 private NodeList getOAICollectionList() {
146 Element message = this.doc.createElement(OAIXML.MESSAGE);
147 Element request = this.doc.createElement(OAIXML.REQUEST);
148 message.appendChild(request);
149 request.setAttribute(OAIXML.TYPE, OAIXML.OAI_SET_LIST);
150 request.setAttribute(OAIXML.TO, "");
151 Node msg_node = mr.process(message);
152
153 if (msg_node == null) {
154 logger.error("returned msg_node from mr is null");
155 return null;
156 }
157 Element resp = (Element)GSXML.getChildByTagName(msg_node, OAIXML.RESPONSE);
158 Element coll_list = (Element)GSXML.getChildByTagName(resp, OAIXML.COLLECTION_LIST);
159 if (coll_list == null) {
160 logger.error("coll_list is null");
161 return null;
162 }
163 //logger.info(GSXML.xmlNodeToString(coll_list));
164 NodeList list = coll_list.getElementsByTagName(OAIXML.COLLECTION);
165 int length = list.getLength();
166 if (length == 0) {
167 logger.error("length is 0");
168 return null;
169 }
170 return list;
171 }
172 /**Exclusively called by doListSets()*/
173 private void getSets(Element list_sets_elem, NodeList oai_coll, int start_point, int end_point) {
174 for (int i=start_point; i<end_point; i++) {
175 String coll_spec = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
176 String coll_name = coll_spec.substring(coll_spec.indexOf(":") + 1);
177 Element set = OAIXML.createElement(OAIXML.SET);
178 Element set_spec = OAIXML.createElement(OAIXML.SET_SPEC);
179 GSXML.setNodeText(set_spec, coll_spec);
180 set.appendChild(set_spec);
181 Element set_name = OAIXML.createElement(OAIXML.SET_NAME);
182 GSXML.setNodeText(set_name, coll_name);
183 set.appendChild(set_name);
184 list_sets_elem.appendChild(set);
185 }
186 }
187 private int getResumeAfter() {
188 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
189 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
190 return -1;
191 }
192 /** method to compose a set element
193 */
194 private Element doListSets(Element msg){
195 logger.info("");
196 // option: resumptionToken
197 // exceptions: badArgument, badResumptionToken, noSetHierarchy
198 Element list_sets_elem = OAIXML.createElement(OAIXML.LIST_SETS);
199
200 //ask the message router for a list of oai collections
201 NodeList oai_coll = getOAICollectionList();
202 int oai_coll_size = oai_coll.getLength();
203 if (oai_coll_size == 0) {
204 return getMessage(list_sets_elem);
205 }
206
207 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
208 if (req == null) {
209 logger.error("req is null");
210 return null;
211 }
212 //params list only contains the parameters other than the verb
213 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
214 Element param = null;
215 int smaller = (oai_coll_size>resume_after)? resume_after : oai_coll_size;
216 if (params.getLength() > 1) {
217 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
218 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
219 }
220 if(params.getLength() == 0) {
221 //this is requesting a list of sets in the whole repository
222 /** there is no resumeptionToken in the request, we check whether we need
223 * to send out resumeptionToken by comparing the total number of sets in this
224 * repository and the specified value of resumeAfter
225 */
226 if(resume_after < 0 || oai_coll_size <= resume_after) {
227 //send the whole list of records
228 //all data are sent on the first request. Therefore there should be
229 //no resumeptionToken stored in OAIConfig.xml.
230 //As long as the verb is 'ListSets', we ignore the rest of the parameters
231 getSets(list_sets_elem, oai_coll, 0, oai_coll_size);
232 return getMessage(list_sets_elem);
233 }
234
235 //append required sets to list_sets_elem (may be a complete or incomplete list)
236 getSets(list_sets_elem, oai_coll, 0, smaller);
237
238 if(oai_coll_size > resume_after) {
239 //An incomplete list is sent; append a resumptionToken element
240 Element token = createResumptionTokenElement(oai_coll_size, 0, resume_after, true);
241 //store this token
242 OAIXML.addToken(token);
243
244 list_sets_elem.appendChild(token);
245 }
246
247 return getMessage(list_sets_elem);
248 }
249
250 // The url should contain only one param called resumptionToken
251 // This is requesting a subsequent part of a list, following a previously sent incomplete list
252 param = (Element)params.item(0);
253 String param_name = param.getAttribute(OAIXML.NAME);
254 if (!param_name.equals(OAIXML.RESUMPTION_TOKEN)) {
255 //Bad argument
256 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
257 }
258 //get the token
259 String token = param.getAttribute(OAIXML.VALUE);
260 //validate the token string (the string has already been decoded in OAIServer, e.g.,
261 // replace %3A with ':')
262 if(OAIXML.containsToken(token) == false) {
263 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
264 }
265 //take out the cursor value, which is the size of previously sent list
266 int index = token.indexOf(":");
267 int cursor = Integer.parseInt(token.substring(index + 1));
268 Element token_elem = null;
269
270 // are we sending the final part of a complete list?
271 if(cursor + resume_after >= oai_coll_size) {
272 //Yes, we are.
273 //append required sets to list_sets_elem (list is complete)
274 getSets(list_sets_elem, oai_coll, cursor, oai_coll_size);
275 //An incomplete list is sent; append a resumptionToken element
276 token_elem = createResumptionTokenElement(oai_coll_size, cursor, -1, false);
277 list_sets_elem.appendChild(token_elem);
278 } else {
279 //No, we are not.
280 //append required sets to list_sets_elem (list is incomplete)
281 getSets(list_sets_elem, oai_coll, cursor, cursor + resume_after);
282 token_elem = createResumptionTokenElement(oai_coll_size, cursor, cursor + resume_after, true);
283 //store this token
284 OAIXML.addToken(token_elem);
285 list_sets_elem.appendChild(token_elem);
286 }
287 return getMessage(list_sets_elem);
288 }
289 private Element createResumptionTokenElement(int total_size, int cursor, int so_far_sent, boolean set_expiration, String metadata_prefix) {
290 Element token = OAIXML.createElement(OAIXML.RESUMPTION_TOKEN);
291 token.setAttribute(OAIXML.COMPLETE_LIST_SIZE, "" + total_size);
292 token.setAttribute(OAIXML.CURSOR, "" + cursor);
293
294 if(set_expiration) {
295 /** read the resumptionTokenExpiration element in OAIConfig.xml and get the specified time value
296 * Use the time value plus the current system time to get the expiration date string.
297 */
298 String expiration_date = OAIXML.getTime(System.currentTimeMillis() + OAIXML.getTokenExpiration()); // in milliseconds
299 token.setAttribute(OAIXML.EXPIRATION_DATE, expiration_date);
300 }
301
302 if(so_far_sent > 0) {
303 //the format of resumptionToken is not defined by the OAI-PMH and should be
304 //considered opaque by the harvester (in other words, strictly follow what the
305 //data provider has to offer
306 //Here, we make use of the uniqueness of the system time
307 String tokenValue = OAIXML.GS3OAI + System.currentTimeMillis() + ":" + so_far_sent;
308 if(!metadata_prefix.equals("")) {
309 tokenValue = tokenValue + ":" + metadata_prefix;
310 }
311 GSXML.setNodeText(token, tokenValue);
312 }
313 return token;
314 }
315
316 private Element createResumptionTokenElement(int total_size, int cursor, int so_far_sent, boolean set_expiration) {
317 return createResumptionTokenElement(total_size, cursor, so_far_sent, set_expiration, ""); // empty metadata_prefix
318 }
319
320 /** if the param_map contains strings other than those in valid_strs, return false;
321 * otherwise true.
322 */
323 private boolean isValidParam(HashMap<String, String> param_map, HashSet<String> valid_strs) {
324 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
325 for(int i=0; i<param_list.size(); i++) {
326 if (valid_strs.contains(param_list.get(i)) == false) {
327 return false;
328 }
329 }
330 return true;
331 }
332 private Element doListIdentifiers(Element msg) {
333 // option: from, until, set, metadataPrefix, resumptionToken
334 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
335 HashSet<String> valid_strs = new HashSet<String>();
336 valid_strs.add(OAIXML.FROM);
337 valid_strs.add(OAIXML.UNTIL);
338 valid_strs.add(OAIXML.SET);
339 valid_strs.add(OAIXML.METADATA_PREFIX);
340 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
341
342 Element list_identifiers = OAIXML.createElement(OAIXML.LIST_IDENTIFIERS);
343 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
344 if (req == null) { logger.error("req is null"); return null; }
345 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
346 String coll_name = "";
347 String token = "";
348
349 HashMap<String, String> param_map = OAIXML.getParamMap(params);
350 if (!isValidParam(param_map, valid_strs)) {
351 logger.error("One of the params is invalid");
352 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
353 }
354 // param keys are valid, but if there are any date params, check they're of the right format
355 String from = param_map.get(OAIXML.FROM);
356 if(from != null) {
357 Date from_date = OAIXML.getDate(from);
358 if(from_date == null) {
359 logger.error("invalid date: " + from);
360 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
361 }
362 }
363 String until = param_map.get(OAIXML.UNTIL);
364 if(until != null) {
365 Date until_date = OAIXML.getDate(until);
366 if(until_date == null) {
367 logger.error("invalid date: " + until);
368 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
369 }
370 }
371 if(from != null && until != null) { // check they are of the same date-time format (granularity)
372 if(from.length() != until.length()) {
373 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
374 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
375 }
376 }
377
378 //ask the message router for a list of oai collections
379 NodeList oai_coll = getOAICollectionList();
380 int oai_coll_size = oai_coll.getLength();
381 if (oai_coll_size == 0) {
382 logger.info("returned oai collection list is empty");
383 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
384 }
385
386 //Now we check if the optional argument 'set' has been specified in the params; if so,
387 //whether the specified setSpec is supported by this repository
388 boolean request_set = param_map.containsKey(OAIXML.SET);
389 if(request_set == true) {
390 boolean set_supported = false;
391 String set_spec_str = param_map.get(OAIXML.SET);
392 // get the collection name
393 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
394 String[] strs = splitSetSpec(set_spec_str);
395 coll_name = strs[1];
396
397 for(int i=0; i<oai_coll_size; i++) {
398 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
399 set_supported = true;
400 }
401 }
402 if(set_supported == false) {
403 logger.error("requested set is not found in this repository");
404 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
405 }
406 }
407
408 //Is there a resumptionToken included which is requesting an incomplete list?
409 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
410 // validate resumptionToken
411 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
412 logger.info("has resumptionToken" + token);
413 if(OAIXML.containsToken(token) == false) {
414 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
415 }
416 }
417
418 // Custom test that expects a metadataPrefix comes here at end so that the official params can
419 // be tested first for errors and their error responses sent off. Required for OAI validation
420 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
421 logger.error("contains invalid params or no metadataPrefix");
422 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
423 }
424
425 //Now that we got a prefix, check and see if it's supported by this repository
426 String prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
427 if (containsMetadataPrefix(prefix_value) == false) {
428 logger.error("requested prefix is not found in OAIConfig.xml");
429 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
430 }
431
432 //Now that all validation has been done, I hope, we can send request to the message router
433 Element result = null;
434 String verb = req.getAttribute(OAIXML.TO);
435 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
436 ArrayList<Element> retain_param_list = new ArrayList<Element>();
437 for (int j=0; j<param_list.getLength(); j++) {
438 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
439 retain_param_list.add(e);
440 }
441
442 //re-organize the request element
443 // reset the 'to' attribute
444 if (request_set == false) {
445 logger.info("requesting identifiers of all collections");
446 for(int i=0; i<oai_coll_size; i++) {
447 if(req == null) {
448 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
449 msg.appendChild(req);
450 for (int j=0; j<retain_param_list.size(); j++) {
451 req.appendChild(retain_param_list.get(j));
452 }
453 }
454 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
455 coll_name = full_name.substring(full_name.indexOf(":") + 1);
456 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
457 Node n = mr.process(msg);
458 Element e = converter.nodeToElement(n);
459 result = collectAll(result, e, verb, OAIXML.HEADER);
460
461 //clear the content of the old request element
462 msg.removeChild(req);
463 req = null;
464 }
465 } else {
466 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
467 Node result_node = mr.process(msg);
468 result = converter.nodeToElement(result_node);
469 }
470
471 if (result == null) {
472 logger.info("message router returns null");
473 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
474 }
475 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
476 if(res == null) {
477 logger.info("response element in xml_result is null");
478 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
479 }
480 NodeList header_list = res.getElementsByTagName(OAIXML.HEADER);
481 int num_headers = header_list.getLength();
482 if(num_headers == 0) {
483 logger.info("message router returns 0 headers.");
484 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
485 }
486
487 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
488 // save the token as well.
489 if (token.equals("") == true) {
490 if(resume_after < 0 || num_headers <= resume_after) {
491 //send the whole list of records
492 return result;
493 }
494
495 //append required number of records (may be a complete or incomplete list)
496 getRecords(list_identifiers, header_list, 0, resume_after);
497 //An incomplete list is sent; append a resumptionToken element
498 Element token_elem = createResumptionTokenElement(num_headers, 0, resume_after, true);
499 //store this token
500 OAIXML.addToken(token_elem);
501
502 list_identifiers.appendChild(token_elem);
503 return getMessage(list_identifiers);
504 }
505
506 if (token.equals("") == false) {
507 //get an appropriate number of records (partial list) according to the token
508 //take out the cursor value, which is the size of previously sent list
509 int index = token.indexOf(":");
510 int cursor = Integer.parseInt(token.substring(index + 1));
511 Element token_elem = null;
512
513 // are we sending the final part of a complete list?
514 if(cursor + resume_after >= num_headers) {
515 //Yes, we are.
516 //append required records to list_records (list is complete)
517 getRecords(list_identifiers, header_list, cursor, num_headers);
518 //An incomplete list is sent; append a resumptionToken element
519 token_elem = createResumptionTokenElement(num_headers, cursor, -1, false);
520 list_identifiers.appendChild(token_elem);
521 } else {
522 //No, we are not.
523 //append required records to list_records (list is incomplete)
524 getRecords(list_identifiers, header_list, cursor, cursor + resume_after);
525 token_elem = createResumptionTokenElement(num_headers, cursor, cursor + resume_after, true);
526 //store this token
527 OAIXML.addToken(token_elem);
528 list_identifiers.appendChild(token_elem);
529 }
530
531 return getMessage(list_identifiers);
532 }//end of if(!token.equals(""))
533
534 return result;
535 }
536 private Element doListRecords(Element msg){
537 logger.info("");
538 // option: from, until, set, metadataPrefix, and resumptionToken
539 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
540 HashSet<String> valid_strs = new HashSet<String>();
541 valid_strs.add(OAIXML.FROM);
542 valid_strs.add(OAIXML.UNTIL);
543 valid_strs.add(OAIXML.SET);
544 valid_strs.add(OAIXML.METADATA_PREFIX);
545 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
546
547 Element list_records = OAIXML.createElement(OAIXML.LIST_RECORDS);
548 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
549 if (req == null) { logger.error("req is null"); return null; }
550 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
551
552 String coll_name = "";
553 String token = "";
554
555 if(params.getLength() == 0) {
556 logger.error("must at least have the metadataPrefix parameter, can't be none");
557 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
558 }
559
560 HashMap<String, String> param_map = OAIXML.getParamMap(params);
561 if (!isValidParam(param_map, valid_strs)) {
562 logger.error("One of the params is invalid");
563 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
564 }
565 // param keys are valid, but if there are any date params, check they're of the right format
566 String from = param_map.get(OAIXML.FROM);
567 if(from != null) {
568 Date from_date = OAIXML.getDate(from);
569 if(from_date == null) {
570 logger.error("invalid date: " + from);
571 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
572 }
573 }
574 String until = param_map.get(OAIXML.UNTIL);
575 Date until_date = null;
576 if(until != null) {
577 until_date = OAIXML.getDate(until);
578 if(until_date == null) {
579 logger.error("invalid date: " + until);
580 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
581 }
582 }
583 if(from != null && until != null) { // check they are of the same date-time format (granularity)
584 if(from.length() != until.length()) {
585 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
586 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
587 }
588 }
589
590 //ask the message router for a list of oai collections
591 NodeList oai_coll = getOAICollectionList();
592 int oai_coll_size = oai_coll.getLength();
593 if (oai_coll_size == 0) {
594 logger.info("returned oai collection list is empty");
595 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
596 }
597
598 //Now we check if the optional argument 'set' has been specified in the params; if so,
599 //whether the specified setSpec is supported by this repository
600 boolean request_set = param_map.containsKey(OAIXML.SET);
601 if(request_set == true) {
602 boolean set_supported = false;
603 String set_spec_str = param_map.get(OAIXML.SET);
604 // get the collection name
605 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
606 String[] strs = splitSetSpec(set_spec_str);
607// name_of_site = strs[0];
608 coll_name = strs[1];
609 //logger.info("param contains set: "+coll_name);
610
611 for(int i=0; i<oai_coll_size; i++) {
612 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
613 set_supported = true;
614 }
615 }
616 if(set_supported == false) {
617 logger.error("requested set is not found in this repository");
618 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
619 }
620 }
621
622 //Is there a resumptionToken included which is requesting an incomplete list?
623 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
624 // validate resumptionToken
625 //if (the token value is not found in the token xml file) {
626 // return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
627 //} else {
628 // use the request to get a complete list of records from the message router
629 // and issue the subsequent part of that complete list according to the token.
630 // store a new token if necessary.
631 //}
632 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
633 logger.info("has resumptionToken: " + token);
634 if(OAIXML.containsToken(token) == false) {
635 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
636 }
637 }
638
639 // Moved the additional custom test that mandates the metadataPrefix here, since official
640 // errors should be caught first, so that their error responses can be sent off first
641 // such that GS2's oaiserver will validate properly.
642 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
643 if(!token.equals("")) { // resumptiontoken
644 int lastIndex = token.lastIndexOf(":");
645 if(lastIndex != token.indexOf(":")) { // if a meta_prefix is suffixed to the usual token,
646 // put that in the map and remove it from the end of the stored token
647 String meta_prefix = token.substring(lastIndex+1);
648 param_map.put(OAIXML.METADATA_PREFIX, meta_prefix);
649 token = token.substring(0, lastIndex);
650 param_map.put(OAIXML.RESUMPTION_TOKEN, token);
651
652 // Add to request <param name="metadataPrefix" value="oai_dc"/>
653 // need to add metaprefix as param to request, else a request
654 // for subsequent records when working with resumption tokens will fail
655 Element paramEl = req.getOwnerDocument().createElement(OAIXML.PARAM);
656 paramEl.setAttribute(OAIXML.NAME, OAIXML.METADATA_PREFIX);
657 paramEl.setAttribute(OAIXML.VALUE, meta_prefix);
658 req.appendChild(paramEl);
659 }
660 } else { // no metadata_prefix
661
662 // it must have a metadataPrefix
663 /** Here I disagree with the OAI specification: even if a resumptionToken is
664 * included in the request, the metadataPrefix is a must argument. Otherwise
665 * how would we know what metadataPrefix the harvester requested in his last request?
666 */
667 logger.error("no metadataPrefix");
668 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
669 }
670 }
671
672 //Now that we got a prefix, check and see if it's supported by this repository
673 String prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
674 if (containsMetadataPrefix(prefix_value) == false) {
675 logger.error("requested prefix is not found in OAIConfig.xml");
676 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
677 }
678
679
680 //Now that all validation has been done, I hope, we can send request to the message router
681 Element result = null;
682 String verb = req.getAttribute(OAIXML.TO);
683 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
684 ArrayList<Element> retain_param_list = new ArrayList<Element>();
685 for (int j=0; j<param_list.getLength(); j++) {
686 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
687 retain_param_list.add(e);
688 }
689
690 //re-organize the request element
691 // reset the 'to' attribute
692 if (request_set == false) {
693 //coll_name could be "", which means it's requesting all records of all collections
694 //we send a request to each collection asking for its records
695 for(int i=0; i<oai_coll_size; i++) {
696 if(req == null) {
697 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
698 msg.appendChild(req);
699 for (int j=0; j<retain_param_list.size(); j++) {
700 req.appendChild(retain_param_list.get(j));
701 }
702 }
703 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
704 coll_name = full_name.substring(full_name.indexOf(":") + 1);
705 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
706 //logger.info(GSXML.xmlNodeToString(req));
707 Node n = mr.process(msg);
708 Element e = converter.nodeToElement(n);
709 result = collectAll(result, e, verb, OAIXML.RECORD);
710
711 //clear the content of the old request element
712 msg.removeChild(req);
713 req = null;
714 }
715 } else {
716 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
717
718 Node result_node = mr.process(msg);
719 result = converter.nodeToElement(result_node);
720 }
721
722 if (result == null) {
723 logger.info("message router returns null");
724 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
725 }
726 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
727 if(res == null) {
728 logger.info("response element in xml_result is null");
729 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
730 }
731 NodeList record_list = res.getElementsByTagName(OAIXML.RECORD);
732 int num_records = record_list.getLength();
733 if(num_records == 0) {
734 logger.info("message router returns 0 records.");
735 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
736 }
737
738 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
739 // save the token as well.
740 if (token.equals("") == true) {
741 if(resume_after < 0 || num_records <= resume_after) {
742 //send the whole list of records
743 return result;
744 }
745
746 //append required number of records (may be a complete or incomplete list)
747 getRecords(list_records, record_list, 0, resume_after);
748 //An incomplete list is sent; append a resumptionToken element
749 Element token_elem = createResumptionTokenElement(num_records, 0, resume_after, true, param_map.get(OAIXML.METADATA_PREFIX));
750 //store this token
751 OAIXML.addToken(token_elem);
752
753 list_records.appendChild(token_elem);
754 return getMessage(list_records);
755 }
756
757 if (token.equals("") == false) {
758 //get an appropriate number of records (partial list) according to the token
759 //take out the cursor value, which is the size of previously sent list
760 int index = token.indexOf(":");
761 int cursor = Integer.parseInt(token.substring(index + 1));
762 Element token_elem = null;
763
764 // are we sending the final part of a complete list?
765 if(cursor + resume_after >= num_records) {
766 //Yes, we are.
767 //append required records to list_records (list is complete)
768 getRecords(list_records, record_list, cursor, num_records);
769 //An incomplete list is sent; append a resumptionToken element
770 token_elem = createResumptionTokenElement(num_records, cursor, -1, false, param_map.get(OAIXML.METADATA_PREFIX));
771 list_records.appendChild(token_elem);
772
773 } else {
774 //No, we are not.
775 //append required records to list_records (list is incomplete)
776 getRecords(list_records, record_list, cursor, cursor + resume_after);
777 token_elem = createResumptionTokenElement(num_records, cursor, cursor + resume_after, true, param_map.get(OAIXML.METADATA_PREFIX));
778 //store this token
779 OAIXML.addToken(token_elem);
780 list_records.appendChild(token_elem);
781 }
782
783 return getMessage(list_records);
784 }//end of if(!token.equals(""))
785
786 return result;//a backup return
787 }
788 // method exclusively used by doListRecords/doListIdentifiers
789 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
790 for (int i=start_point; i<end_point; i++) {
791 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
792 }
793 }
794 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
795 if(result == null) {
796 //in the first round, result is null
797 return msg;
798 }
799 Element res_in_result = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
800 if(res_in_result == null) { // return the results of all other collections accumulated so far
801 return msg;
802 }
803 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
804 if(msg == null) {
805 return result;
806 }
807
808 //e.g., get all <record> elements from the returned message. There may be none of
809 //such element, for example, the collection service returned an error message
810 NodeList elem_list = msg.getElementsByTagName(elem_name);
811
812 for (int i=0; i<elem_list.getLength(); i++) {
813 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
814 }
815 return result;
816 }
817 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormats.
818 * The first one is handled here, and the last two are processed by OAIPMH.
819 */
820 private Element doListMetadataFormats(Element msg) {
821 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
822 //, or there is no parameter; otherwise it is an error
823 //logger.info("" + this.converter.getString(msg));
824
825 Element list_metadata_formats = OAIXML.createElement(OAIXML.LIST_METADATA_FORMATS);
826
827 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
828 if (req == null) { logger.error(""); return null; }
829 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
830 Element param = null;
831 if(params.getLength() == 0) {
832 //this is requesting metadata formats for the whole repository
833 //read the oaiConfig.xml file, return the metadata formats specified there.
834 Element oai_config = OAIXML.getOAIConfigXML();
835 if (oai_config == null) {
836 return getMessage(OAIXML.createErrorElement(OAIXML.ERROR, OAIXML.SERVICE_UNAVAILABLE));
837 } else {
838 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
839 if(format_list == null) {
840 logger.error("OAIConfig.xml must contain the supported metadata formats");
841 return getMessage(list_metadata_formats);
842 }
843 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
844 for(int i=0; i<formats.getLength(); i++) {
845 Element meta_fmt = OAIXML.createElement(OAIXML.METADATA_FORMAT);
846 Element first_meta_format = (Element)formats.item(i);
847 //the element also contains mappings, but we don't want them
848 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
849 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
850 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
851 list_metadata_formats.appendChild(meta_fmt);
852 }
853 return getMessage(list_metadata_formats);
854 }
855
856 } else if (params.getLength() > 1) {
857 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
858 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
859 } else {
860 // This is a request for the metadata of a particular item with an identifier
861 /**the request xml is in the form: <request>
862 * <param name=.../>
863 * </request>
864 *And there is a param element and one element only. (No paramList element in between).
865 */
866 param = (Element)params.item(0);
867 String param_name = param.getAttribute(OAIXML.NAME);
868 String identifier = "";
869 if (!param_name.equals(OAIXML.IDENTIFIER)) {
870 //Bad argument
871 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
872 } else {
873 identifier = param.getAttribute(OAIXML.VALUE);
874 // the identifier is in the form: <site_name>:<coll_name>:<OID>
875 // so it must contain at least two ':' characters
876 String[] strs = identifier.split(":");
877 if(strs == null || strs.length < 3) {
878 // the OID may also contain ':'
879 logger.error("identifier is not in the form site:coll:id" + identifier);
880 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
881 }
882
883 // send request to message router
884 // get the names
885 strs = splitNames(identifier);
886 if(strs == null || strs.length < 3) {
887 logger.error("identifier is not in the form site:coll:id" + identifier);
888 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
889 }
890 String name_of_site = strs[0];
891 String coll_name = strs[1];
892 String oid = strs[2];
893
894 //re-organize the request element
895 // reset the 'to' attribute
896 String verb = req.getAttribute(OAIXML.TO);
897 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
898 // reset the identifier element
899 param.setAttribute(OAIXML.NAME, OAIXML.OID);
900 param.setAttribute(OAIXML.VALUE, oid);
901
902 //Now send the request to the message router to process
903 Node result_node = mr.process(msg);
904 return converter.nodeToElement(result_node);
905 }
906 }
907
908 }
909 private void appendParam(Element req, String name, String value) {
910 Element param = req.getOwnerDocument().createElement(OAIXML.PARAM);
911 param.setAttribute(OAIXML.NAME, name);
912 param.setAttribute(OAIXML.VALUE, value);
913 req.appendChild(param);
914 }
915 private void copyElement(Element identify, String tag_name) {
916 Element from_repository_name = (Element)GSXML.getChildByTagName(oai_config, tag_name);
917 if(from_repository_name != null) {
918 Element this_repository_name = OAIXML.createElement(tag_name);
919 GSXML.setNodeText(this_repository_name, GSXML.getNodeText(from_repository_name));
920 identify.appendChild(this_repository_name);
921 }
922 }
923 private Element doIdentify() {
924 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
925 logger.info("");
926
927 Element identify = OAIXML.createElement(OAIXML.IDENTIFY);
928 //do the repository name
929 copyElement(identify, OAIXML.REPOSITORY_NAME);
930 //do the baseurl
931 copyElement(identify, OAIXML.BASE_URL);
932 //do the protocol version
933 copyElement(identify, OAIXML.PROTOCOL_VERSION);
934
935 //There can be more than one admin email according to the OAI specification
936 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
937 int num_admin = 0;
938 Element from_admin_email = null;
939 if (admin_emails != null) {
940 num_admin = admin_emails.getLength();
941 }
942 for (int i=0; i<num_admin; i++) {
943 copyElement(identify, OAIXML.ADMIN_EMAIL);
944 }
945
946 //do the earliestDatestamp
947 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
948 //ask the message router for a list of oai collections
949 NodeList oai_coll = getOAICollectionList();
950 long earliestDatestamp = getEarliestDateStamp(oai_coll);
951 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
952 Element earliestDatestamp_elem = OAIXML.createElement(OAIXML.EARLIEST_DATESTAMP);
953 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
954 identify.appendChild(earliestDatestamp_elem);
955
956 //do the deletedRecord
957 copyElement(identify, OAIXML.DELETED_RECORD);
958 //do the granularity
959 copyElement(identify, OAIXML.GRANULARITY);
960
961 return getMessage(identify);
962 }
963 //split setSpec (site_name:coll_name) into an array of strings
964 //It has already been checked that the set_spec contains at least one ':'
965 private String[] splitSetSpec(String set_spec) {
966 logger.info(set_spec);
967 String[] strs = new String[2];
968 int colon_index = set_spec.indexOf(":");
969 strs[0] = set_spec.substring(0, colon_index);
970 strs[1] = set_spec.substring(colon_index + 1);
971 return strs;
972 }
973 /** split the identifier into <site + collection + OID> as an array
974 It has already been checked that the 'identifier' contains at least two ':'
975 */
976 private String[] splitNames(String identifier) {
977 logger.info(identifier);
978 String [] strs = new String[3];
979 int first_colon = identifier.indexOf(":");
980 if(first_colon == -1) {
981 return null;
982 }
983 strs[0] = identifier.substring(0, first_colon);
984
985 String sr = identifier.substring(first_colon + 1);
986 int second_colon = sr.indexOf(":");
987 //logger.error(first_colon + " " + second_colon);
988 strs[1] = sr.substring(0, second_colon);
989
990 strs[2] = sr.substring(second_colon + 1);
991 return strs;
992 }
993 /** validate if the specified metadata prefix value is supported by the repository
994 * by checking it in the OAIConfig.xml
995 */
996 private boolean containsMetadataPrefix(String prefix_value) {
997 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
998
999 for(int i=0; i<prefix_list.getLength(); i++) {
1000 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
1001 return true;
1002 }
1003 }
1004 return false;
1005 }
1006 private Element doGetRecord(Element msg){
1007 logger.info("");
1008 /** arguments:
1009 identifier: required
1010 metadataPrefix: required
1011 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
1012 */
1013 Element get_record = OAIXML.createElement(OAIXML.GET_RECORD);
1014
1015 HashSet<String> valid_strs = new HashSet<String>();
1016 valid_strs.add(OAIXML.IDENTIFIER);
1017 valid_strs.add(OAIXML.METADATA_PREFIX);
1018
1019 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
1020 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
1021 HashMap<String, String> param_map = OAIXML.getParamMap(params);
1022
1023 if(!isValidParam(param_map, valid_strs) ||
1024 params.getLength() == 0 ||
1025 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
1026 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
1027 logger.error("must have the metadataPrefix/identifier parameter.");
1028 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
1029 }
1030
1031 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
1032 String identifier = param_map.get(OAIXML.IDENTIFIER);
1033
1034 // verify the metadata prefix
1035 if (containsMetadataPrefix(prefix) == false) {
1036 logger.error("requested prefix is not found in OAIConfig.xml");
1037 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
1038 }
1039
1040 // get the names
1041 String[] strs = splitNames(identifier);
1042 if(strs == null || strs.length < 3) {
1043 logger.error("identifier is not in the form site:coll:id" + identifier);
1044 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
1045 }
1046 String name_of_site = strs[0];
1047 String coll_name = strs[1];
1048 String oid = strs[2];
1049
1050 //re-organize the request element
1051 // reset the 'to' attribute
1052 String verb = req.getAttribute(OAIXML.TO);
1053 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
1054 // reset the identifier element
1055 Element param = GSXML.getNamedElement(req, OAIXML.PARAM, OAIXML.NAME, OAIXML.IDENTIFIER);
1056 if (param != null) {
1057 param.setAttribute(OAIXML.NAME, OAIXML.OID);
1058 param.setAttribute(OAIXML.VALUE, oid);
1059 }
1060
1061 //Now send the request to the message router to process
1062 Node result_node = mr.process(msg);
1063 return converter.nodeToElement(result_node);
1064 }
1065
1066 // See OAIConfig.xml
1067 // dynamically works out what the earliestDateStamp is, since it varies by collection
1068 // returns this time in *milliseconds*.
1069 protected long getEarliestDateStamp(NodeList oai_coll) {
1070 //do the earliestDatestamp
1071 long earliestDatestamp = System.currentTimeMillis();
1072 int oai_coll_size = oai_coll.getLength();
1073 if (oai_coll_size == 0) {
1074 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be 1970-01-01.");
1075 earliestDatestamp = 0;
1076 }
1077 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1078 // we get the earliestDatestamp among the collections
1079 for(int i=0; i<oai_coll_size; i++) {
1080 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1081 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1082 }
1083
1084 return earliestDatestamp*1000; // converting from seconds to milliseconds
1085 }
1086}
Note: See TracBrowser for help on using the repository browser.