source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 24007

Last change on this file since 24007 was 24007, checked in by sjm84, 13 years ago

Updating this branch to match the latest Greenstone3 changes

File size: 44.4 KB
Line 
1package org.greenstone.gsdl3.core;
2
3import org.greenstone.gsdl3.util.*;
4import org.greenstone.gsdl3.action.*;
5// XML classes
6import org.w3c.dom.Node;
7import org.w3c.dom.NodeList;
8import org.w3c.dom.Document;
9import org.w3c.dom.Element;
10
11// other java classes
12import java.io.File;
13import java.util.*;
14
15import org.apache.log4j.*;
16
17/** a Receptionist, used for oai metadata response xml generation.
18 * This receptionist talks to the message router directly,
19 * instead of via any action, hence no action map is needed.
20 * @see the basic Receptionist
21 */
22public class OAIReceptionist implements ModuleInterface {
23
24 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
25
26 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
27 protected String site_name = null;
28 /** container Document to create XML Nodes for requests sent to message router
29 * Not used for response
30 */
31 protected Document doc=null;
32
33 /** a converter class to parse XML and create Docs */
34 protected XMLConverter converter=null;
35
36 /** the configure file of this receptionist passed from the oai servlet. */
37 protected Element oai_config = null;
38
39 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
40 protected int resume_after = -1 ;
41
42 /** the message router that the Receptionist and Actions will talk to */
43 protected ModuleInterface mr = null;
44
45 public OAIReceptionist() {
46 this.converter = new XMLConverter();
47 this.doc = this.converter.newDOM();
48
49 }
50
51 public void cleanUp() {}
52
53 public void setSiteName(String site_name) {
54 this.site_name = site_name;
55 }
56 /** sets the message router - it should already be created and
57 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
58 public void setMessageRouter(ModuleInterface mr) {
59 this.mr = mr;
60 }
61
62 /** configures the receptionist */
63 public boolean configure(Element config) {
64
65 if (this.mr==null) {
66 logger.error(" message routers must be set before calling oai configure");
67 return false;
68 }
69 if (config == null) {
70 logger.error(" oai configure file is null");
71 return false;
72 }
73 oai_config = config;
74 resume_after = getResumeAfter();
75
76 //clear out expired resumption tokens stored in OAIResumptionToken.xml
77 OAIXML.init();
78 OAIXML.clearExpiredTokens();
79
80 return true;
81 }
82 /** process using strings - just calls process using Elements */
83 public String process(String xml_in) {
84
85 Node message_node = this.converter.getDOM(xml_in);
86 Node page = process(message_node);
87 return this.converter.getString(page);
88 }
89
90 //Compose a message element used to send back to the OAIServer servlet.
91 //This method is only used within OAIReceptionist
92 private Element getMessage(Element e) {
93 Element msg = OAIXML.createElement(OAIXML.MESSAGE);
94 msg.appendChild(OAIXML.getResponse(e));
95 return msg;
96 }
97 /** process - produce xml data in response to a request
98 * if something goes wrong, it returns null -
99 */
100 public Node process(Node message_node) {
101 logger.error("OAIReceptionist received request");
102
103 Element message = this.converter.nodeToElement(message_node);
104 logger.error(this.converter.getString(message));
105
106 // check that its a correct message tag
107 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
108 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
109 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
110 }
111
112 // get the request out of the message - assume that there is only one
113 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
114 if (request == null) {
115 logger.error(" message had no request!");
116 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
117 }
118 //At this stage, the value of 'to' attribute of the request must be the 'verb'
119 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
120 String verb = request.getAttribute(GSXML.TO_ATT);
121 if (verb.equals(OAIXML.IDENTIFY)) {
122 return doIdentify();
123 }
124 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
125 return doListMetadataFormats(message);
126 }
127 if (verb.equals(OAIXML.LIST_SETS)) {
128 return doListSets(message);
129 }
130 if (verb.equals(OAIXML.GET_RECORD)) {
131 return doGetRecord(message);
132 }
133 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
134 return doListIdentifiers(message);
135 }
136 if (verb.equals(OAIXML.LIST_RECORDS)) {
137 return doListRecords(message);
138 }
139 return getMessage(OAIXML.createErrorElement("Unexpected things happened", ""));
140
141 }
142 /** send a request to the message router asking for a list of collections that support oai
143 * The type attribute must be changed from 'oaiService' to 'oaiSetList'
144 */
145 private NodeList getOAICollectionList() {
146 Element message = this.doc.createElement(OAIXML.MESSAGE);
147 Element request = this.doc.createElement(OAIXML.REQUEST);
148 message.appendChild(request);
149 request.setAttribute(OAIXML.TYPE, OAIXML.OAI_SET_LIST);
150 request.setAttribute(OAIXML.TO, "");
151 Node msg_node = mr.process(message);
152
153 if (msg_node == null) {
154 logger.error("returned msg_node from mr is null");
155 return null;
156 }
157 Element resp = (Element)GSXML.getChildByTagName(msg_node, OAIXML.RESPONSE);
158 Element coll_list = (Element)GSXML.getChildByTagName(resp, OAIXML.COLLECTION_LIST);
159 if (coll_list == null) {
160 logger.error("coll_list is null");
161 return null;
162 }
163 //logger.info(GSXML.xmlNodeToString(coll_list));
164 NodeList list = coll_list.getElementsByTagName(OAIXML.COLLECTION);
165 int length = list.getLength();
166 if (length == 0) {
167 logger.error("length is 0");
168 return null;
169 }
170 return list;
171 }
172 /**Exclusively called by doListSets()*/
173 private void getSets(Element list_sets_elem, NodeList oai_coll, int start_point, int end_point) {
174 for (int i=start_point; i<end_point; i++) {
175 String coll_spec = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
176 String coll_name = coll_spec.substring(coll_spec.indexOf(":") + 1);
177 Element set = OAIXML.createElement(OAIXML.SET);
178 Element set_spec = OAIXML.createElement(OAIXML.SET_SPEC);
179 GSXML.setNodeText(set_spec, coll_spec);
180 set.appendChild(set_spec);
181 Element set_name = OAIXML.createElement(OAIXML.SET_NAME);
182 GSXML.setNodeText(set_name, coll_name);
183 set.appendChild(set_name);
184 list_sets_elem.appendChild(set);
185 }
186 }
187 private int getResumeAfter() {
188 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
189 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
190 return -1;
191 }
192 /** method to compose a set element
193 */
194 private Element doListSets(Element msg){
195 logger.info("");
196 // option: resumptionToken
197 // exceptions: badArgument, badResumptionToken, noSetHierarchy
198 Element list_sets_elem = OAIXML.createElement(OAIXML.LIST_SETS);
199
200 //ask the message router for a list of oai collections
201 NodeList oai_coll = getOAICollectionList();
202 int oai_coll_size = oai_coll.getLength();
203 if (oai_coll_size == 0) {
204 return getMessage(list_sets_elem);
205 }
206
207 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
208 if (req == null) {
209 logger.error("req is null");
210 return null;
211 }
212 //params list only contains the parameters other than the verb
213 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
214 Element param = null;
215 int smaller = (oai_coll_size>resume_after)? resume_after : oai_coll_size;
216 if (params.getLength() > 1) {
217 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
218 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
219 }
220 if(params.getLength() == 0) {
221 //this is requesting a list of sets in the whole repository
222 /** there is no resumeptionToken in the request, we check whether we need
223 * to send out resumeptionToken by comparing the total number of sets in this
224 * repository and the specified value of resumeAfter
225 */
226 if(resume_after < 0 || oai_coll_size <= resume_after) {
227 //send the whole list of records
228 //all data are sent on the first request. Therefore there should be
229 //no resumeptionToken stored in OAIConfig.xml.
230 //As long as the verb is 'ListSets', we ignore the rest of the parameters
231 getSets(list_sets_elem, oai_coll, 0, oai_coll_size);
232 return getMessage(list_sets_elem);
233 }
234
235 //append required sets to list_sets_elem (may be a complete or incomplete list)
236 getSets(list_sets_elem, oai_coll, 0, smaller);
237
238 if(oai_coll_size > resume_after) {
239 //An incomplete list is sent; append a resumptionToken element
240 Element token = createResumptionTokenElement(oai_coll_size, 0, resume_after, true);
241 //store this token
242 OAIXML.addToken(token);
243
244 list_sets_elem.appendChild(token);
245 }
246
247 return getMessage(list_sets_elem);
248 }
249
250 // The url should contain only one param called resumptionToken
251 // This is requesting a subsequent part of a list, following a previously sent incomplete list
252 param = (Element)params.item(0);
253 String param_name = param.getAttribute(OAIXML.NAME);
254 if (!param_name.equals(OAIXML.RESUMPTION_TOKEN)) {
255 //Bad argument
256 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
257 }
258 //get the token
259 String token = param.getAttribute(OAIXML.VALUE);
260 //validate the token string (the string has already been decoded in OAIServer, e.g.,
261 // replace %3A with ':')
262 if(OAIXML.containsToken(token) == false) {
263 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
264 }
265 //take out the cursor value, which is the size of previously sent list
266 int index = token.indexOf(":");
267 int cursor = Integer.parseInt(token.substring(index + 1));
268 Element token_elem = null;
269
270 // are we sending the final part of a complete list?
271 if(cursor + resume_after >= oai_coll_size) {
272 //Yes, we are.
273 //append required sets to list_sets_elem (list is complete)
274 getSets(list_sets_elem, oai_coll, cursor, oai_coll_size);
275 //An incomplete list is sent; append a resumptionToken element
276 token_elem = createResumptionTokenElement(oai_coll_size, cursor, -1, false);
277 list_sets_elem.appendChild(token_elem);
278 } else {
279 //No, we are not.
280 //append required sets to list_sets_elem (list is incomplete)
281 getSets(list_sets_elem, oai_coll, cursor, cursor + resume_after);
282 token_elem = createResumptionTokenElement(oai_coll_size, cursor, cursor + resume_after, true);
283 //store this token
284 OAIXML.addToken(token_elem);
285 list_sets_elem.appendChild(token_elem);
286 }
287 return getMessage(list_sets_elem);
288 }
289 private Element createResumptionTokenElement(int total_size, int cursor, int so_far_sent, boolean set_expiration) {
290 Element token = OAIXML.createElement(OAIXML.RESUMPTION_TOKEN);
291 token.setAttribute(OAIXML.COMPLETE_LIST_SIZE, "" + total_size);
292 token.setAttribute(OAIXML.CURSOR, "" + cursor);
293
294 if(set_expiration) {
295 /** read the resumptionTokenExpiration element in OAIConfig.xml and get the specified time value
296 * Use the time value plus the current system time to get the expiration date string.
297 */
298 String expiration_date = OAIXML.getTime(System.currentTimeMillis() + OAIXML.getTokenExpiration()); // in milliseconds
299 token.setAttribute(OAIXML.EXPIRATION_DATE, expiration_date);
300 }
301
302 if(so_far_sent > 0) {
303 //the format of resumptionToken is not defined by the OAI-PMH and should be
304 //considered opaque by the harvester (in other words, strictly follow what the
305 //data provider has to offer
306 //Here, we make use of the uniqueness of the system time
307 GSXML.setNodeText(token, OAIXML.GS3OAI + System.currentTimeMillis() + ":" + so_far_sent);
308 }
309 return token;
310 }
311 /** if the param_map contains strings other than those in valid_strs, return false;
312 * otherwise true.
313 */
314 private boolean isValidParam(HashMap param_map, HashSet valid_strs) {
315 ArrayList param_list = new ArrayList(param_map.keySet());
316 for(int i=0; i<param_list.size(); i++) {
317 if (valid_strs.contains((String)param_list.get(i)) == false) {
318 return false;
319 }
320 }
321 return true;
322 }
323 private Element doListIdentifiers(Element msg) {
324 // option: from, until, set, metadataPrefix, resumptionToken
325 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
326 HashSet valid_strs = new HashSet();
327 valid_strs.add(OAIXML.FROM);
328 valid_strs.add(OAIXML.UNTIL);
329 valid_strs.add(OAIXML.SET);
330 valid_strs.add(OAIXML.METADATA_PREFIX);
331 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
332
333 Element list_identifiers = OAIXML.createElement(OAIXML.LIST_IDENTIFIERS);
334 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
335 if (req == null) { logger.error("req is null"); return null; }
336 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
337 String coll_name = "";
338 String token = "";
339
340 HashMap param_map = OAIXML.getParamMap(params);
341 if (!isValidParam(param_map, valid_strs)) {
342 logger.error("One of the params is invalid");
343 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
344 }
345 // param keys are valid, but if there are any date params, check they're of the right format
346 String from = (String)param_map.get(OAIXML.FROM);
347 if(from != null) {
348 Date from_date = OAIXML.getDate(from);
349 if(from_date == null) {
350 logger.error("invalid date: " + from);
351 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
352 }
353 }
354 String until = (String)param_map.get(OAIXML.UNTIL);
355 if(until != null) {
356 Date until_date = OAIXML.getDate(until);
357 if(until_date == null) {
358 logger.error("invalid date: " + until);
359 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
360 }
361 }
362 if(from != null && until != null) { // check they are of the same date-time format (granularity)
363 if(from.length() != until.length()) {
364 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
365 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
366 }
367 }
368
369 //ask the message router for a list of oai collections
370 NodeList oai_coll = getOAICollectionList();
371 int oai_coll_size = oai_coll.getLength();
372 if (oai_coll_size == 0) {
373 logger.info("returned oai collection list is empty");
374 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
375 }
376
377 //Now we check if the optional argument 'set' has been specified in the params; if so,
378 //whether the specified setSpec is supported by this repository
379 boolean request_set = param_map.containsKey(OAIXML.SET);
380 if(request_set == true) {
381 boolean set_supported = false;
382 String set_spec_str = (String)param_map.get(OAIXML.SET);
383 // get the collection name
384 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
385 String[] strs = splitSetSpec(set_spec_str);
386 coll_name = strs[1];
387
388 for(int i=0; i<oai_coll_size; i++) {
389 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
390 set_supported = true;
391 }
392 }
393 if(set_supported == false) {
394 logger.error("requested set is not found in this repository");
395 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
396 }
397 }
398
399 //Is there a resumptionToken included which is requesting an incomplete list?
400 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
401 // validate resumptionToken
402 token = (String)param_map.get(OAIXML.RESUMPTION_TOKEN);
403 logger.info("has resumptionToken" + token);
404 if(OAIXML.containsToken(token) == false) {
405 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
406 }
407 }
408
409 // Custom test that expects a metadataPrefix comes here at end so that the official params can
410 // be tested first for errors and their error responses sent off. Required for OAI validation
411 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
412 logger.error("contains invalid params or no metadataPrefix");
413 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
414 }
415
416 //Now that we got a prefix, check and see if it's supported by this repository
417 String prefix_value = (String)param_map.get(OAIXML.METADATA_PREFIX);
418 if (containsMetadataPrefix(prefix_value) == false) {
419 logger.error("requested prefix is not found in OAIConfig.xml");
420 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
421 }
422
423 //Now that all validation has been done, I hope, we can send request to the message router
424 Element result = null;
425 String verb = req.getAttribute(OAIXML.TO);
426 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
427 ArrayList retain_param_list = new ArrayList();
428 for (int j=0; j<param_list.getLength(); j++) {
429 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
430 retain_param_list.add(e);
431 }
432
433 //re-organize the request element
434 // reset the 'to' attribute
435 if (request_set == false) {
436 logger.info("requesting identifiers of all collections");
437 for(int i=0; i<oai_coll_size; i++) {
438 if(req == null) {
439 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
440 msg.appendChild(req);
441 for (int j=0; j<retain_param_list.size(); j++) {
442 req.appendChild((Element)retain_param_list.get(j));
443 }
444 }
445 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
446 coll_name = full_name.substring(full_name.indexOf(":") + 1);
447 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
448 Node n = mr.process(msg);
449 Element e = converter.nodeToElement(n);
450 result = collectAll(result, e, verb, OAIXML.HEADER);
451
452 //clear the content of the old request element
453 msg.removeChild(req);
454 req = null;
455 }
456 } else {
457 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
458 Node result_node = mr.process(msg);
459 result = converter.nodeToElement(result_node);
460 }
461
462 if (result == null) {
463 logger.info("message router returns null");
464 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
465 }
466 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
467 if(res == null) {
468 logger.info("response element in xml_result is null");
469 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
470 }
471 NodeList header_list = res.getElementsByTagName(OAIXML.HEADER);
472 int num_headers = header_list.getLength();
473 if(num_headers == 0) {
474 logger.info("message router returns 0 headers.");
475 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
476 }
477
478 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
479 // save the token as well.
480 if (token.equals("") == true) {
481 if(resume_after < 0 || num_headers <= resume_after) {
482 //send the whole list of records
483 return result;
484 }
485
486 //append required number of records (may be a complete or incomplete list)
487 getRecords(list_identifiers, header_list, 0, resume_after);
488 //An incomplete list is sent; append a resumptionToken element
489 Element token_elem = createResumptionTokenElement(num_headers, 0, resume_after, true);
490 //store this token
491 OAIXML.addToken(token_elem);
492
493 list_identifiers.appendChild(token_elem);
494 return getMessage(list_identifiers);
495 }
496
497 if (token.equals("") == false) {
498 //get an appropriate number of records (partial list) according to the token
499 //take out the cursor value, which is the size of previously sent list
500 int index = token.indexOf(":");
501 int cursor = Integer.parseInt(token.substring(index + 1));
502 Element token_elem = null;
503
504 // are we sending the final part of a complete list?
505 if(cursor + resume_after >= num_headers) {
506 //Yes, we are.
507 //append required records to list_records (list is complete)
508 getRecords(list_identifiers, header_list, cursor, num_headers);
509 //An incomplete list is sent; append a resumptionToken element
510 token_elem = createResumptionTokenElement(num_headers, cursor, -1, false);
511 list_identifiers.appendChild(token_elem);
512 } else {
513 //No, we are not.
514 //append required records to list_records (list is incomplete)
515 getRecords(list_identifiers, header_list, cursor, cursor + resume_after);
516 token_elem = createResumptionTokenElement(num_headers, cursor, cursor + resume_after, true);
517 //store this token
518 OAIXML.addToken(token_elem);
519 list_identifiers.appendChild(token_elem);
520 }
521
522 return getMessage(list_identifiers);
523 }//end of if(!token.equals(""))
524
525 return result;
526 }
527 private Element doListRecords(Element msg){
528 logger.info("");
529 // option: from, until, set, metadataPrefix, and resumptionToken
530 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
531 HashSet valid_strs = new HashSet();
532 valid_strs.add(OAIXML.FROM);
533 valid_strs.add(OAIXML.UNTIL);
534 valid_strs.add(OAIXML.SET);
535 valid_strs.add(OAIXML.METADATA_PREFIX);
536 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
537
538 Element list_records = OAIXML.createElement(OAIXML.LIST_RECORDS);
539 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
540 if (req == null) { logger.error("req is null"); return null; }
541 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
542
543 String coll_name = "";
544 String token = "";
545
546 if(params.getLength() == 0) {
547 logger.error("must at least have the metadataPrefix parameter, can't be none");
548 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
549 }
550
551 HashMap param_map = OAIXML.getParamMap(params);
552 if (!isValidParam(param_map, valid_strs)) {
553 logger.error("One of the params is invalid");
554 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
555 }
556 // param keys are valid, but if there are any date params, check they're of the right format
557 String from = (String)param_map.get(OAIXML.FROM);
558 if(from != null) {
559 Date from_date = OAIXML.getDate(from);
560 if(from_date == null) {
561 logger.error("invalid date: " + from);
562 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
563 }
564 }
565 String until = (String)param_map.get(OAIXML.UNTIL);
566 Date until_date = null;
567 if(until != null) {
568 until_date = OAIXML.getDate(until);
569 if(until_date == null) {
570 logger.error("invalid date: " + until);
571 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
572 }
573 }
574 if(from != null && until != null) { // check they are of the same date-time format (granularity)
575 if(from.length() != until.length()) {
576 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
577 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
578 }
579 }
580
581 //ask the message router for a list of oai collections
582 NodeList oai_coll = getOAICollectionList();
583 int oai_coll_size = oai_coll.getLength();
584 if (oai_coll_size == 0) {
585 logger.info("returned oai collection list is empty");
586 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
587 }
588
589 //Now we check if the optional argument 'set' has been specified in the params; if so,
590 //whether the specified setSpec is supported by this repository
591 boolean request_set = param_map.containsKey(OAIXML.SET);
592 if(request_set == true) {
593 boolean set_supported = false;
594 String set_spec_str = (String)param_map.get(OAIXML.SET);
595 // get the collection name
596 //if setSpec is supported by this repository, it must be in the form: site_name:coll_name
597 String[] strs = splitSetSpec(set_spec_str);
598// name_of_site = strs[0];
599 coll_name = strs[1];
600 //logger.info("param contains set: "+coll_name);
601
602 for(int i=0; i<oai_coll_size; i++) {
603 if(set_spec_str.equals(((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME))) {
604 set_supported = true;
605 }
606 }
607 if(set_supported == false) {
608 logger.error("requested set is not found in this repository");
609 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
610 }
611 }
612
613 //Is there a resumptionToken included which is requesting an incomplete list?
614 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
615 // validate resumptionToken
616 //if (the token value is not found in the token xml file) {
617 // return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
618 //} else {
619 // use the request to get a complete list of records from the message router
620 // and issue the subsequent part of that complete list according to the token.
621 // store a new token if necessary.
622 //}
623 token = (String)param_map.get(OAIXML.RESUMPTION_TOKEN);
624 logger.info("has resumptionToken" + token);
625 if(OAIXML.containsToken(token) == false) {
626 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_RESUMPTION_TOKEN, ""));
627 }
628 }
629
630 // Moved the additional custom test that mandates the metadataPrefix here, since official
631 // errors should be caught first, so that their error responses can be sent off first
632 // such that GS2's oaiserver will validate properly.
633 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
634 // it must have a metadataPrefix
635 /** Here I disagree with the OAI specification: even if a resumptionToken is
636 * included in the request, the metadataPrefix is a must argument. Otherwise
637 * how would we know what metadataPrefix the harvester requested in his last request?
638 */
639 logger.error("no metadataPrefix");
640 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
641 }
642
643 //Now that we got a prefix, check and see if it's supported by this repository
644 String prefix_value = (String)param_map.get(OAIXML.METADATA_PREFIX);
645 if (containsMetadataPrefix(prefix_value) == false) {
646 logger.error("requested prefix is not found in OAIConfig.xml");
647 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
648 }
649
650 //Now that all validation has been done, I hope, we can send request to the message router
651 Element result = null;
652 String verb = req.getAttribute(OAIXML.TO);
653 NodeList param_list = req.getElementsByTagName(OAIXML.PARAM);
654 ArrayList retain_param_list = new ArrayList();
655 for (int j=0; j<param_list.getLength(); j++) {
656 Element e = OAIXML.duplicateElement(msg.getOwnerDocument(), (Element)param_list.item(j), true);
657 retain_param_list.add(e);
658 }
659
660 //re-organize the request element
661 // reset the 'to' attribute
662 if (request_set == false) {
663 //coll_name could be "", which means it's requesting all records of all collections
664 //we send a request to each collection asking for its records
665 for(int i=0; i<oai_coll_size; i++) {
666 if(req == null) {
667 req = msg.getOwnerDocument().createElement(GSXML.REQUEST_ELEM);
668 msg.appendChild(req);
669 for (int j=0; j<retain_param_list.size(); j++) {
670 req.appendChild((Element)retain_param_list.get(j));
671 }
672 }
673 String full_name = ((Element)oai_coll.item(i)).getAttribute(OAIXML.NAME);
674 coll_name = full_name.substring(full_name.indexOf(":") + 1);
675 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
676 //logger.info(GSXML.xmlNodeToString(req));
677 Node n = mr.process(msg);
678 Element e = converter.nodeToElement(n);
679 result = collectAll(result, e, verb, OAIXML.RECORD);
680
681 //clear the content of the old request element
682 msg.removeChild(req);
683 req = null;
684 }
685 } else {
686 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
687
688 Node result_node = mr.process(msg);
689 result = converter.nodeToElement(result_node);
690 }
691
692 if (result == null) {
693 logger.info("message router returns null");
694 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
695 }
696 Element res = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
697 if(res == null) {
698 logger.info("response element in xml_result is null");
699 return getMessage(OAIXML.createErrorElement("Internal service returns null", ""));
700 }
701 NodeList record_list = res.getElementsByTagName(OAIXML.RECORD);
702 int num_records = record_list.getLength();
703 if(num_records == 0) {
704 logger.info("message router returns 0 records.");
705 return getMessage(OAIXML.createErrorElement(OAIXML.NO_RECORDS_MATCH, ""));
706 }
707
708 //The request coming in does not contain a token, but we have to check the resume_after value and see if we need to issue a resumption token and
709 // save the token as well.
710 if (token.equals("") == true) {
711 if(resume_after < 0 || num_records <= resume_after) {
712 //send the whole list of records
713 return result;
714 }
715
716 //append required number of records (may be a complete or incomplete list)
717 getRecords(list_records, record_list, 0, resume_after);
718 //An incomplete list is sent; append a resumptionToken element
719 Element token_elem = createResumptionTokenElement(num_records, 0, resume_after, true);
720 //store this token
721 OAIXML.addToken(token_elem);
722
723 list_records.appendChild(token_elem);
724 return getMessage(list_records);
725 }
726
727 if (token.equals("") == false) {
728 //get an appropriate number of records (partial list) according to the token
729 //take out the cursor value, which is the size of previously sent list
730 int index = token.indexOf(":");
731 int cursor = Integer.parseInt(token.substring(index + 1));
732 Element token_elem = null;
733
734 // are we sending the final part of a complete list?
735 if(cursor + resume_after >= num_records) {
736 //Yes, we are.
737 //append required records to list_records (list is complete)
738 getRecords(list_records, record_list, cursor, num_records);
739 //An incomplete list is sent; append a resumptionToken element
740 token_elem = createResumptionTokenElement(num_records, cursor, -1, false);
741 list_records.appendChild(token_elem);
742 } else {
743 //No, we are not.
744 //append required records to list_records (list is incomplete)
745 getRecords(list_records, record_list, cursor, cursor + resume_after);
746 token_elem = createResumptionTokenElement(num_records, cursor, cursor + resume_after, true);
747 //store this token
748 OAIXML.addToken(token_elem);
749 list_records.appendChild(token_elem);
750 }
751
752 return getMessage(list_records);
753 }//end of if(!token.equals(""))
754
755 return result;//a backup return
756 }
757 // method exclusively used by doListRecords/doListIdentifiers
758 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
759 for (int i=start_point; i<end_point; i++) {
760 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
761 }
762 }
763 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
764 if(result == null) {
765 //in the first round, result is null
766 return msg;
767 }
768 Element res_in_result = (Element)GSXML.getChildByTagName(result, OAIXML.RESPONSE);
769 if(res_in_result == null) { // return the results of all other collections accumulated so far
770 return msg;
771 }
772 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
773 if(msg == null) {
774 return result;
775 }
776
777 //e.g., get all <record> elements from the returned message. There may be none of
778 //such element, for example, the collection service returned an error message
779 NodeList elem_list = msg.getElementsByTagName(elem_name);
780
781 for (int i=0; i<elem_list.getLength(); i++) {
782 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
783 }
784 return result;
785 }
786 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormats.
787 * The first one is handled here, and the last two are processed by OAIPMH.
788 */
789 private Element doListMetadataFormats(Element msg) {
790 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
791 //, or there is no parameter; otherwise it is an error
792 //logger.info("" + this.converter.getString(msg));
793
794 Element list_metadata_formats = OAIXML.createElement(OAIXML.LIST_METADATA_FORMATS);
795
796 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
797 if (req == null) { logger.error(""); return null; }
798 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
799 Element param = null;
800 if(params.getLength() == 0) {
801 //this is requesting metadata formats for the whole repository
802 //read the oaiConfig.xml file, return the metadata formats specified there.
803 Element oai_config = OAIXML.getOAIConfigXML();
804 if (oai_config == null) {
805 return getMessage(OAIXML.createErrorElement(OAIXML.ERROR, OAIXML.SERVICE_UNAVAILABLE));
806 } else {
807 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
808 if(format_list == null) {
809 logger.error("OAIConfig.xml must contain the supported metadata formats");
810 return getMessage(list_metadata_formats);
811 }
812 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
813 for(int i=0; i<formats.getLength(); i++) {
814 Element meta_fmt = OAIXML.createElement(OAIXML.METADATA_FORMAT);
815 Element first_meta_format = (Element)formats.item(i);
816 //the element also contains mappings, but we don't want them
817 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
818 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
819 meta_fmt.appendChild(meta_fmt.getOwnerDocument().importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
820 list_metadata_formats.appendChild(meta_fmt);
821 }
822 return getMessage(list_metadata_formats);
823 }
824
825 } else if (params.getLength() > 1) {
826 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
827 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
828 } else {
829 // This is a request for the metadata of a particular item with an identifier
830 /**the request xml is in the form: <request>
831 * <param name=.../>
832 * </request>
833 *And there is a param element and one element only. (No paramList element in between).
834 */
835 param = (Element)params.item(0);
836 String param_name = param.getAttribute(OAIXML.NAME);
837 String identifier = "";
838 if (!param_name.equals(OAIXML.IDENTIFIER)) {
839 //Bad argument
840 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
841 } else {
842 identifier = param.getAttribute(OAIXML.VALUE);
843 // the identifier is in the form: <site_name>:<coll_name>:<OID>
844 // so it must contain at least two ':' characters
845 String[] strs = identifier.split(":");
846 if(strs == null || strs.length < 3) {
847 // the OID may also contain ':'
848 logger.error("identifier is not in the form site:coll:id" + identifier);
849 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
850 }
851
852 // send request to message router
853 // get the names
854 strs = splitNames(identifier);
855 if(strs == null || strs.length < 3) {
856 logger.error("identifier is not in the form site:coll:id" + identifier);
857 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
858 }
859 String name_of_site = strs[0];
860 String coll_name = strs[1];
861 String oid = strs[2];
862
863 //re-organize the request element
864 // reset the 'to' attribute
865 String verb = req.getAttribute(OAIXML.TO);
866 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
867 // reset the identifier element
868 param.setAttribute(OAIXML.NAME, OAIXML.OID);
869 param.setAttribute(OAIXML.VALUE, oid);
870
871 //Now send the request to the message router to process
872 Node result_node = mr.process(msg);
873 return converter.nodeToElement(result_node);
874 }
875 }
876
877 }
878 private void appendParam(Element req, String name, String value) {
879 Element param = req.getOwnerDocument().createElement(OAIXML.PARAM);
880 param.setAttribute(OAIXML.NAME, name);
881 param.setAttribute(OAIXML.VALUE, value);
882 req.appendChild(param);
883 }
884 private void copyElement(Element identify, String tag_name) {
885 Element from_repository_name = (Element)GSXML.getChildByTagName(oai_config, tag_name);
886 if(from_repository_name != null) {
887 Element this_repository_name = OAIXML.createElement(tag_name);
888 GSXML.setNodeText(this_repository_name, GSXML.getNodeText(from_repository_name));
889 identify.appendChild(this_repository_name);
890 }
891 }
892 private Element doIdentify() {
893 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
894 logger.info("");
895
896 Element identify = OAIXML.createElement(OAIXML.IDENTIFY);
897 //do the repository name
898 copyElement(identify, OAIXML.REPOSITORY_NAME);
899 //do the baseurl
900 copyElement(identify, OAIXML.BASE_URL);
901 //do the protocol version
902 copyElement(identify, OAIXML.PROTOCOL_VERSION);
903
904 //There can be more than one admin email according to the OAI specification
905 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
906 int num_admin = 0;
907 Element from_admin_email = null;
908 if (admin_emails != null) {
909 num_admin = admin_emails.getLength();
910 }
911 for (int i=0; i<num_admin; i++) {
912 copyElement(identify, OAIXML.ADMIN_EMAIL);
913 }
914
915 //do the earliestDatestamp
916 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
917 //ask the message router for a list of oai collections
918 NodeList oai_coll = getOAICollectionList();
919 long earliestDatestamp = getEarliestDateStamp(oai_coll);
920 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
921 Element earliestDatestamp_elem = OAIXML.createElement(OAIXML.EARLIEST_DATESTAMP);
922 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
923 identify.appendChild(earliestDatestamp_elem);
924
925 //do the deletedRecord
926 copyElement(identify, OAIXML.DELETED_RECORD);
927 //do the granularity
928 copyElement(identify, OAIXML.GRANULARITY);
929
930 return getMessage(identify);
931 }
932 //split setSpec (site_name:coll_name) into an array of strings
933 //It has already been checked that the set_spec contains at least one ':'
934 private String[] splitSetSpec(String set_spec) {
935 logger.info(set_spec);
936 String[] strs = new String[2];
937 int colon_index = set_spec.indexOf(":");
938 strs[0] = set_spec.substring(0, colon_index);
939 strs[1] = set_spec.substring(colon_index + 1);
940 return strs;
941 }
942 /** split the identifier into <site + collection + OID> as an array
943 It has already been checked that the 'identifier' contains at least two ':'
944 */
945 private String[] splitNames(String identifier) {
946 logger.info(identifier);
947 String [] strs = new String[3];
948 int first_colon = identifier.indexOf(":");
949 if(first_colon == -1) {
950 return null;
951 }
952 strs[0] = identifier.substring(0, first_colon);
953
954 String sr = identifier.substring(first_colon + 1);
955 int second_colon = sr.indexOf(":");
956 //logger.error(first_colon + " " + second_colon);
957 strs[1] = sr.substring(0, second_colon);
958
959 strs[2] = sr.substring(second_colon + 1);
960 return strs;
961 }
962 /** validate if the specified metadata prefix value is supported by the repository
963 * by checking it in the OAIConfig.xml
964 */
965 private boolean containsMetadataPrefix(String prefix_value) {
966 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
967
968 for(int i=0; i<prefix_list.getLength(); i++) {
969 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
970 return true;
971 }
972 }
973 return false;
974 }
975 private Element doGetRecord(Element msg){
976 logger.info("");
977 /** arguments:
978 identifier: required
979 metadataPrefix: required
980 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
981 */
982 Element get_record = OAIXML.createElement(OAIXML.GET_RECORD);
983
984 HashSet valid_strs = new HashSet();
985 valid_strs.add(OAIXML.IDENTIFIER);
986 valid_strs.add(OAIXML.METADATA_PREFIX);
987
988 Element req = (Element)GSXML.getChildByTagName(msg, GSXML.REQUEST_ELEM);
989 NodeList params = GSXML.getChildrenByTagName(req, OAIXML.PARAM);
990 HashMap param_map = OAIXML.getParamMap(params);
991
992 if(!isValidParam(param_map, valid_strs) ||
993 params.getLength() == 0 ||
994 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
995 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
996 logger.error("must have the metadataPrefix/identifier parameter.");
997 return getMessage(OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, ""));
998 }
999
1000 String prefix = (String)param_map.get(OAIXML.METADATA_PREFIX);
1001 String identifier = (String)param_map.get(OAIXML.IDENTIFIER);
1002
1003 // verify the metadata prefix
1004 if (containsMetadataPrefix(prefix) == false) {
1005 logger.error("requested prefix is not found in OAIConfig.xml");
1006 return getMessage(OAIXML.createErrorElement(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""));
1007 }
1008
1009 // get the names
1010 String[] strs = splitNames(identifier);
1011 if(strs == null || strs.length < 3) {
1012 logger.error("identifier is not in the form site:coll:id" + identifier);
1013 return getMessage(OAIXML.createErrorElement(OAIXML.ID_DOES_NOT_EXIST, ""));
1014 }
1015 String name_of_site = strs[0];
1016 String coll_name = strs[1];
1017 String oid = strs[2];
1018
1019 //re-organize the request element
1020 // reset the 'to' attribute
1021 String verb = req.getAttribute(OAIXML.TO);
1022 req.setAttribute(OAIXML.TO, coll_name + "/" + verb);
1023 // reset the identifier element
1024 Element param = GSXML.getNamedElement(req, OAIXML.PARAM, OAIXML.NAME, OAIXML.IDENTIFIER);
1025 if (param != null) {
1026 param.setAttribute(OAIXML.NAME, OAIXML.OID);
1027 param.setAttribute(OAIXML.VALUE, oid);
1028 }
1029
1030 //Now send the request to the message router to process
1031 Node result_node = mr.process(msg);
1032 return converter.nodeToElement(result_node);
1033 }
1034
1035 // See OAIConfig.xml
1036 // dynamically works out what the earliestDateStamp is, since it varies by collection
1037 // returns this time in *milliseconds*.
1038 protected long getEarliestDateStamp(NodeList oai_coll) {
1039 //do the earliestDatestamp
1040 long earliestDatestamp = System.currentTimeMillis();
1041 int oai_coll_size = oai_coll.getLength();
1042 if (oai_coll_size == 0) {
1043 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be 1970-01-01.");
1044 earliestDatestamp = 0;
1045 }
1046 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1047 // we get the earliestDatestamp among the collections
1048 for(int i=0; i<oai_coll_size; i++) {
1049 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1050 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1051 }
1052
1053 return earliestDatestamp*1000; // converting from seconds to milliseconds
1054 }
1055}
Note: See TracBrowser for help on using the repository browser.