source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28857

Last change on this file since 28857 was 28857, checked in by kjdon, 10 years ago

removed this.doc as DOM is not thread safe. adding oai super set handling. set list is created during configure as it is static. this means that if you add a new collection you need to restart the server. Merged methods for listIdentifiers and ListRecords as the code is pretty much identical. better resumption token handling. big code tidy up.

File size: 39.4 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** a converter class to parse XML and create Docs */
51 protected XMLConverter converter=null;
52
53 /** the configure file of this receptionist passed from the oai servlet. */
54 protected Element oai_config = null;
55
56 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
57 protected int resume_after = -1 ;
58
59 /** the message router that the Receptionist and Actions will talk to */
60 protected ModuleInterface mr = null;
61
62 // Some of the data/responses will not change while the servlet is running, so
63 // we can cache them
64
65 /** A list of all the collections available to this OAI server */
66 protected NodeList collection_list = null;
67 /** a vector of the names, for convenience */
68 protected Vector<String> collection_name_list = null;
69 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
70 protected boolean noRecordsMatch = false;
71
72 /** A set of all known 'sets' */
73 protected HashSet<String> set_set = null;
74
75 protected boolean has_super_colls = false;
76 /** a hash of super set-> collection list */
77 protected HashMap<String, Vector<String>> super_coll_map = null;
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86 this.converter = new XMLConverter();
87 }
88
89 public void cleanUp() {}
90
91 public void setSiteName(String site_name) {
92 this.site_name = site_name;
93 }
94 /** sets the message router - it should already be created and
95 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
96 public void setMessageRouter(ModuleInterface mr) {
97 this.mr = mr;
98 }
99
100 /** configures the receptionist */
101 public boolean configure(Element config) {
102
103 if (this.mr==null) {
104 logger.error(" message routers must be set before calling oai configure");
105 return false;
106 }
107 if (config == null) {
108 logger.error(" oai configure file is null");
109 return false;
110 }
111 oai_config = config;
112 resume_after = getResumeAfter();
113
114 repository_id = getRepositoryIdentifier();
115 if (!configureSetInfo()) {
116 // there are no sets
117 logger.error("No sets (collections) available for OAI");
118 return false;
119 }
120
121 //clear out expired resumption tokens stored in OAIResumptionToken.xml
122 OAIResumptionToken.init();
123 OAIResumptionToken.clearExpiredTokens();
124
125 return true;
126 }
127
128 // assuming that sets are static. If collections change then the servlet
129 // should be restarted.
130 private boolean configureSetInfo() {
131 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
132 // Will be like
133 // <oaiSuperSet>
134 // <SetSpec>xxx</SetSpec>
135 // <setName>xxx</SetName>
136 // <SetDescription>xxx</setDescription>
137 // </oaiSuperSet>
138 // The super set is listed in OAIConfig, and collections themselves state
139 // whether they are part of the super set or not.
140 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
141 HashMap<String, Element> super_coll_data = new HashMap<String, Element>();
142 if (super_coll_list.getLength() > 0) {
143 this.has_super_colls = true;
144 for (int i=0; i<super_coll_list.getLength(); i++) {
145 Element super_coll = (Element)super_coll_list.item(i);
146 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
147 if (set_spec != null) {
148 String name = GSXML.getNodeText(set_spec);
149 if (!name.equals("")) {
150 super_coll_data.put(name, super_coll);
151 logger.error("adding in super coll "+name);
152 }
153 }
154 }
155
156 if (super_coll_data.size()==0) {
157 this.has_super_colls = false;
158 }
159 }
160 if (this.has_super_colls == true) {
161 this.super_coll_map = new HashMap<String, Vector<String>>();
162 }
163 this.set_set = new HashSet<String>();
164
165 // next, we get a list of all the OAI enabled collections
166 // We get this by sending a listSets request to the MR
167 Document doc = this.converter.newDOM();
168 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
169
170 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
171 message.appendChild(request);
172 Node msg_node = mr.process(message);
173
174 if (msg_node == null) {
175 logger.error("returned msg_node from mr is null");
176 return false;
177 }
178 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
179 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
180 if (coll_list == null) {
181 logger.error("coll_list is null");
182 return false;
183 }
184
185 NodeList list = coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
186 int length = list.getLength();
187 if (length == 0) {
188 logger.error("length is 0");
189 noRecordsMatch = true;
190 return false;
191 }
192
193 this.collection_list = list;
194 this.collection_name_list = new Vector<String>();
195
196 Document listsets_doc = this.converter.newDOM();
197 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
198 this.listsets_response = getMessage(listsets_doc, listsets_element);
199
200 // Now, for each collection, get a list of all its sets
201 // might include subsets (classifiers) or super colls
202 // We'll reuse the first message, changing its type and to atts
203 request.setAttribute(GSXML.TYPE_ATT, "");
204 StringBuffer to = new StringBuffer();
205 for (int i=0; i<collection_list.getLength(); i++) {
206 if (i!=0) {
207 to.append(',');
208 }
209 String coll_id =((Element) collection_list.item(i)).getAttribute(GSXML.NAME_ATT);
210 logger.error("coll_id = "+coll_id);
211 to.append(coll_id+"/"+OAIXML.LIST_SETS);
212 this.collection_name_list.add(coll_id);
213 }
214 logger.error ("to att = "+to.toString());
215 request.setAttribute(GSXML.TO_ATT, to.toString());
216 // send to MR
217 msg_node = mr.process(message);
218 logger.error(this.converter.getPrettyString(msg_node));
219 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
220 for (int c=0; c<response_list.getLength(); c++) {
221 // for each collection's response
222 Element response = (Element)response_list.item(c);
223 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
224 logger.error("coll from response "+coll_name);
225 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
226 for (int j=0; j<set_list.getLength(); j++) {
227 // now check if it a super collection
228 Element set = (Element)set_list.item(j);
229 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
230 logger.error("set spec = "+set_spec);
231 // this may change if we add site name back in
232 // setSpecs will be collname or collname:subset or supercollname
233 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
234 // it must be a super coll spec
235 logger.error("found super coll, "+set_spec);
236 // check that it is a valid one from config
237 if (this.has_super_colls == true && super_coll_data.containsKey(set_spec)) {
238 Vector <String> subcolls = this.super_coll_map.get(set_spec);
239 if (subcolls == null) {
240 logger.error("its new!!");
241 // not in there yet
242 subcolls = new Vector<String>();
243 this.set_set.add(set_spec);
244 this.super_coll_map.put(set_spec, subcolls);
245 // the first time a supercoll is mentioned, add into the set list
246 logger.error("finding the set info "+this.converter.getPrettyString(super_coll_data.get(set_spec)));
247 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, super_coll_data.get(set_spec), OAIXML.SET, true));
248 }
249 // add this collection to the list for the super coll
250 subcolls.add(coll_name);
251 }
252 } else { // its either the coll itself or a subcoll
253 // add in the set
254 listsets_element.appendChild(listsets_doc.importNode(set, true));
255 this.set_set.add(set_spec);
256 }
257 } // for each set in the collection
258 } // for each OAI enabled collection
259 return true;
260 }
261
262 /** process using strings - just calls process using Elements */
263 public String process(String xml_in) {
264
265 Node message_node = this.converter.getDOM(xml_in);
266 Node page = process(message_node);
267 return this.converter.getString(page);
268 }
269
270 //Compose a message/response element used to send back to the OAIServer servlet.
271 //This method is only used within OAIReceptionist
272 private Element getMessage(Document doc, Element e) {
273 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
274 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
275 msg.appendChild(response);
276 response.appendChild(e);
277 return msg;
278 }
279
280 /** process - produce xml data in response to a request
281 * if something goes wrong, it returns null -
282 */
283 public Node process(Node message_node) {
284 logger.error("OAIReceptionist received request");
285
286 Element message = this.converter.nodeToElement(message_node);
287 logger.error(this.converter.getString(message));
288
289 // check that its a correct message tag
290 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
291 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
292 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
293 }
294
295 // get the request out of the message - assume that there is only one
296 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
297 if (request == null) {
298 logger.error(" message had no request!");
299 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
300 }
301 //At this stage, the value of 'to' attribute of the request must be the 'verb'
302 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
303 String verb = request.getAttribute(GSXML.TO_ATT);
304 if (verb.equals(OAIXML.IDENTIFY)) {
305 return doIdentify();
306 }
307 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
308 return doListMetadataFormats(request);
309 }
310 if (verb.equals(OAIXML.LIST_SETS)) {
311 // we have composed the list sets response on init
312 // Note this means that list sets never uses resumption tokens
313 return this.listsets_response;
314 }
315 if (verb.equals(OAIXML.GET_RECORD)) {
316 return doGetRecord(request);
317 }
318 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
319 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
320 }
321 if (verb.equals(OAIXML.LIST_RECORDS)) {
322 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
323 }
324 // should never get here as verbs were checked in OAIServer
325 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
326
327 }
328
329
330 private int getResumeAfter() {
331 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
332 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
333 return -1;
334 }
335 private String getRepositoryIdentifier() {
336 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
337 if (ri != null) {
338 return GSXML.getNodeText(ri);
339 }
340 return "";
341 }
342
343
344 /** if the param_map contains strings other than those in valid_strs, return false;
345 * otherwise true.
346 */
347 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
348 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
349 for(int i=0; i<param_list.size(); i++) {
350 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
351 if (valid_strs.contains(param_list.get(i)) == false) {
352 return false;
353 }
354 }
355 return true;
356 }
357
358 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
359 // options: from, until, set, metadataPrefix, resumptionToken
360 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
361 HashSet<String> valid_strs = new HashSet<String>();
362 valid_strs.add(OAIXML.FROM);
363 valid_strs.add(OAIXML.UNTIL);
364 valid_strs.add(OAIXML.SET);
365 valid_strs.add(OAIXML.METADATA_PREFIX);
366 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
367
368 Document result_doc = this.converter.newDOM();
369 Element result_element = result_doc.createElement(verb);
370 boolean result_token_needed = false; // does this result need to include a
371 // resumption token
372
373 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
374
375 HashMap<String, String> param_map = GSXML.getParamMap(params);
376
377 // are all the params valid?
378 if (!areAllParamsValid(param_map, valid_strs)) {
379 logger.error("One of the params is invalid");
380 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
381 // TODO, need to tell the user which one was invalid ??
382 }
383
384 // Do we have a resumption token??
385 String token = null;
386 String from = null;
387 String until = null;
388 boolean set_requested = false;
389 String set_spec_str = null;
390 String prefix_value = null;
391 int cursor = 0;
392 int current_cursor = 0;
393 String current_set = null;
394
395 int total_size = -1; // we are only going to set this in resumption
396 // token if it is easy to work out, i.e. not sending extra requests to
397 // MR just to calculate total size
398
399 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
400 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
401 // validate resumptionToken
402 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
403 logger.info("has resumptionToken " + token);
404 if(OAIResumptionToken.isValidToken(token) == false) {
405 logger.error("token is not valid");
406 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
407 }
408 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
409 // initialise the request params from the stored token data
410 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
411 from = token_data.get(OAIXML.FROM);
412 until = token_data.get(OAIXML.UNTIL);
413 set_spec_str = token_data.get(OAIXML.SET);
414 if (set_spec_str != null) {
415 set_requested = true;
416 }
417 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
418 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
419 try {
420 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
421 cursor = cursor + resume_after; // increment cursor
422 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
423 } catch (NumberFormatException e) {
424 logger.error("tried to parse int from cursor data and failed");
425 }
426
427 }
428 else {
429 // no resumption token, lets check the other params
430 // there must be a metadataPrefix
431 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
432 logger.error("metadataPrefix param required");
433 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
434 }
435
436 //if there are any date params, check they're of the right format
437 from = param_map.get(OAIXML.FROM);
438 if(from != null) {
439 Date from_date = OAIXML.getDate(from);
440 if(from_date == null) {
441 logger.error("invalid date: " + from);
442 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
443 }
444 }
445 until = param_map.get(OAIXML.UNTIL);
446 if(until != null) {
447 Date until_date = OAIXML.getDate(until);
448 if(until_date == null) {
449 logger.error("invalid date: " + until);
450 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
451 }
452 }
453 if(from != null && until != null) { // check they are of the same date-time format (granularity)
454 if(from.length() != until.length()) {
455 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
456 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
457 }
458 }
459
460 // check the set arg is a set we know about
461 set_requested = param_map.containsKey(OAIXML.SET);
462 set_spec_str = null;
463 if(set_requested == true) {
464 set_spec_str = param_map.get(OAIXML.SET);
465 if (!this.set_set.contains(set_spec_str)) {
466 // the set is not one we know about
467 logger.error("requested set is not found in this repository");
468 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
469
470 }
471 }
472 // Is the metadataPrefix arg one this repository supports?
473 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
474 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
475 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
476 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
477 }
478
479 } // else no resumption token, check other params
480
481 // Whew. Now we have validated the params, we can work on doing the actual
482 // request
483
484
485 Document doc = this.converter.newDOM();
486 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
487 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
488 // TODO does this need a type???
489 mr_msg.appendChild(mr_req);
490
491 // copy in the from/until params if there
492 if (from != null) {
493 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
494 }
495 if (until != null) {
496 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
497 }
498 // add metadataPrefix
499 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
500
501 // do we have a set???
502 // if no set, we send to all collections in the collection list
503 // if super set, we send to all collections in super set list
504 // if a single collection, send to it
505 // if a subset, send to the collection
506 Vector<String> current_coll_list = null;
507 boolean single_collection = false;
508 if (set_requested == false) {
509 // just do all colls
510 current_coll_list = collection_name_list;
511 }
512 else if (has_super_colls && super_coll_map.containsKey(set_spec_str)) {
513 current_coll_list = super_coll_map.get(set_spec_str);
514 }
515 else {
516 current_coll_list = new Vector<String>();
517 if (set_spec_str.indexOf(":") != -1) {
518 // we have a subset
519 //add the set param back into the request, but send the request to the collection
520 String col_name = set_spec_str.substring(0, set_spec_str.indexOf(":"));
521 current_coll_list.add(col_name);
522 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
523 single_collection = true;
524 }
525 else {
526 // it must be a single collection name
527 current_coll_list.add(set_spec_str);
528 single_collection = true;
529 }
530 }
531
532 int num_collected_records = 0;
533 int start_point = current_cursor; // may not be 0 if we are using a resumption token
534 String resumption_collection = "";
535 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
536
537 // iterate through the list of collections and send the request to each
538
539 int start_coll=0;
540 if (current_set != null) {
541 // we are resuming a previous request, need to locate the first collection
542 for (int i=0; i<current_coll_list.size(); i++) {
543 if (current_set.equals(current_coll_list.get(i))) {
544 start_coll = i;
545 break;
546 }
547 }
548 }
549
550 for (int i=start_coll; i<current_coll_list.size(); i++) {
551 String current_coll = current_coll_list.get(i);
552 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
553
554 Element result = (Element)mr.process(mr_msg);
555 logger.error(verb+ " result for coll "+current_coll);
556 logger.error(this.converter.getPrettyString(result));
557 if (result == null) {
558 logger.info("message router returns null");
559 // do what??? carry on? fail??
560 return OAIXML.createErrorMessage("Internal service returns null", "");
561 }
562 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
563 if(res == null) {
564 logger.info("response element in xml_result is null");
565 return OAIXML.createErrorMessage("Internal service returns null", "");
566 }
567 NodeList record_list = res.getElementsByTagName(record_type);
568 int num_records = record_list.getLength();
569 if(num_records == 0) {
570 logger.info("message router returns 0 records for coll "+current_coll);
571 continue; // try the next collection
572 }
573 if (single_collection) {
574 total_size = num_records;
575 }
576 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
577 if (records_to_add > (num_records-start_point)) {
578 records_to_add = num_records-start_point;
579 }
580 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
581 num_collected_records += records_to_add;
582
583 // do we need to stop here, and do we need to issue a resumption token?
584 if (resume_after > 0 && num_collected_records == resume_after) {
585 // we have finished collecting records at the moment.
586 // but are we conincidentally at the end? or are there more to go?
587 if (records_to_add < (num_records - start_point)) {
588 // we have added less than this collection had
589 start_point += records_to_add;
590 resumption_collection = current_coll;
591 result_token_needed = true;
592 }
593 else {
594 // we added all this collection had to offer
595 // is there another collection in the list??
596 if (i<current_coll_list.size()-1) {
597 result_token_needed = true;
598 start_point = 0;
599 resumption_collection = current_coll_list.get(i+1);
600 }
601 else {
602 // we have finished one collection and there are no more collection
603 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
604 logger.error("at end of list, need empty result token");
605 empty_result_token = true;
606 }
607 }
608 break;
609 }
610 start_point = 0; // only the first one will have start non-zero, if we
611 // have a resumption token
612
613 } // for each collection
614
615 if (num_collected_records ==0) {
616 // there were no matching results
617 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
618 }
619
620 if (num_collected_records < resume_after) {
621 // we have been through all collections, and there are no more
622 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
623 if (result_token_needed) {
624 empty_result_token = true;
625 }
626 }
627
628 if (result_token_needed) {
629 // we need a resumption token
630 if (empty_result_token) {
631 logger.error("have empty result token");
632 token = "";
633 } else {
634 if (token != null) {
635 // we had a token for this request, we can just update it
636 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
637 } else {
638 // we are generating a new one
639 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
640 }
641 }
642
643 // result token XML
644 long expiration_date = -1;
645 if (empty_result_token) {
646 // we know how many records in total as we have sent them all
647 total_size = cursor+num_collected_records;
648 } else {
649 // non-empty token, set the expiration date
650 expiration_date = OAIResumptionToken.getExpirationDate(token);
651 }
652 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
653 // OAIXML.addToken(token_elem); // store it
654 result_element.appendChild(token_elem); // add to the result
655 }
656
657
658 return getMessage(result_doc, result_element);
659 }
660
661 private void addRecordsToList(Document doc, Element result_element, NodeList
662 record_list, int start_point, int num_records) {
663 int end_point = start_point + num_records;
664 for (int i=start_point; i<end_point; i++) {
665 result_element.appendChild(doc.importNode(record_list.item(i), true));
666 }
667 }
668
669
670 // method exclusively used by doListRecords/doListIdentifiers
671 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
672 for (int i=start_point; i<end_point; i++) {
673 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
674 }
675 }
676 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
677 if(result == null) {
678 //in the first round, result is null
679 return msg;
680 }
681 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
682 if(res_in_result == null) { // return the results of all other collections accumulated so far
683 return msg;
684 }
685 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
686 if(msg == null) {
687 return result;
688 }
689
690 //e.g., get all <record> elements from the returned message. There may be none of
691 //such element, for example, the collection service returned an error message
692 NodeList elem_list = msg.getElementsByTagName(elem_name);
693
694 for (int i=0; i<elem_list.getLength(); i++) {
695 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
696 }
697 return result;
698 }
699
700
701 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
702 * The first one is handled here, and the last two are processed by OAIPMH.
703 */
704 private Element doListMetadataFormats(Element req) {
705 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
706 //, or there is no parameter; otherwise it is an error
707 //logger.info("" + this.converter.getString(msg));
708
709 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
710 Element param = null;
711 Document lmf_doc = this.converter.newDOM();
712 if(params.getLength() == 0) {
713 //this is requesting metadata formats for the whole repository
714 //read the oaiConfig.xml file, return the metadata formats specified there.
715 if (this.listmetadataformats_response != null) {
716 // we have already created it
717 return this.listmetadataformats_response;
718 }
719
720 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
721
722 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
723 if(format_list == null) {
724 logger.error("OAIConfig.xml must contain the supported metadata formats");
725 // TODO this is internal error, what to do???
726 return getMessage(lmf_doc, list_metadata_formats);
727 }
728 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
729 for(int i=0; i<formats.getLength(); i++) {
730 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
731 Element first_meta_format = (Element)formats.item(i);
732 //the element also contains mappings, but we don't want them
733 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
734 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
735 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
736 list_metadata_formats.appendChild(meta_fmt);
737 }
738 return getMessage(lmf_doc, list_metadata_formats);
739
740
741 }
742
743 if (params.getLength() > 1) {
744 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
745 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
746 }
747
748 // This is a request for the metadata of a particular item with an identifier
749 /**the request xml is in the form: <request>
750 * <param name=.../>
751 * </request>
752 *And there is a param element and one element only. (No paramList element in between).
753 */
754 param = (Element)params.item(0);
755 String param_name = param.getAttribute(GSXML.NAME_ATT);
756 String identifier = "";
757 if (!param_name.equals(OAIXML.IDENTIFIER)) {
758 //Bad argument
759 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
760 }
761
762 identifier = param.getAttribute(GSXML.VALUE_ATT);
763 // the identifier is in the form: <coll_name>:<OID>
764 // so it must contain at least two ':' characters
765 String[] strs = identifier.split(":");
766 if(strs == null || strs.length < 2) {
767 // the OID may also contain ':'
768 logger.error("identifier is not in the form coll:id" + identifier);
769 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
770 }
771
772 // send request to message router
773 // get the names
774 strs = splitNames(identifier);
775 if(strs == null || strs.length < 2) {
776 logger.error("identifier is not in the form coll:id" + identifier);
777 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
778 }
779 //String name_of_site = strs[0];
780 String coll_name = strs[0];
781 String oid = strs[1];
782
783 //re-organize the request element
784 // reset the 'to' attribute
785 String verb = req.getAttribute(GSXML.TO_ATT);
786 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
787 // reset the identifier element
788 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
789 param.setAttribute(GSXML.VALUE_ATT, oid);
790
791 // TODO is this the best way to do this???? should we create a new request???
792 Element message = req.getOwnerDocument().createElement(GSXML.MESSAGE_ELEM);
793 message.appendChild(req);
794 //Now send the request to the message router to process
795 Node result_node = mr.process(message);
796 return converter.nodeToElement(result_node);
797 }
798
799
800
801
802 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
803 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
804 if(original_element != null) {
805 copyNode(to_elem, original_element);
806 }
807 }
808
809 private void copyNode(Element to_elem, Node original_element) {
810 to_elem.appendChild(to_elem.getOwnerDocument().importNode(original_element, true));
811
812 }
813
814 private Element doIdentify() {
815 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
816 logger.info("");
817 if (this.identify_response != null) {
818 // we have already created it
819 return this.identify_response;
820 }
821 Document doc = this.converter.newDOM();
822 Element identify = doc.createElement(OAIXML.IDENTIFY);
823 //do the repository name
824 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
825 //do the baseurl
826 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
827 //do the protocol version
828 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
829
830 //There can be more than one admin email according to the OAI specification
831 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
832 int num_admin = 0;
833 Element from_admin_email = null;
834 if (admin_emails != null) {
835 num_admin = admin_emails.getLength();
836 }
837 for (int i=0; i<num_admin; i++) {
838 copyNode(identify, admin_emails.item(i));
839 }
840
841 //do the earliestDatestamp
842 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
843 //ask the message router for a list of oai collections
844 //NodeList oai_coll = getOAICollectionList();
845 long earliestDatestamp = getEarliestDateStamp(collection_list);
846 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
847 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
848 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
849 identify.appendChild(earliestDatestamp_elem);
850
851 //do the deletedRecord
852 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
853 //do the granularity
854 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
855
856 // output the oai identifier
857 Element description = doc.createElement(OAIXML.DESCRIPTION);
858 identify.appendChild(description);
859 // TODO, make this a valid id
860 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
861 description.appendChild(oaiIdentifier);
862
863 // if there are any oaiInfo metadata, add them in too.
864 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
865 if (info != null) {
866 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
867 if (meta != null && meta.getLength() > 0) {
868 Element gsdl = OAIXML.createGSDLElement(doc);
869 description.appendChild(gsdl);
870 for (int m = 0; m<meta.getLength(); m++) {
871 copyNode(gsdl, meta.item(m));
872 }
873
874 }
875 }
876 this.identify_response = identify;
877 return getMessage(doc, identify);
878 }
879 //split setSpec (site_name:coll_name) into an array of strings
880 //It has already been checked that the set_spec contains at least one ':'
881 private String[] splitSetSpec(String set_spec) {
882 logger.info(set_spec);
883 String[] strs = new String[2];
884 int colon_index = set_spec.indexOf(":");
885 strs[0] = set_spec.substring(0, colon_index);
886 strs[1] = set_spec.substring(colon_index + 1);
887 return strs;
888 }
889 /** split the identifier into <collection + OID> as an array
890 It has already been checked that the 'identifier' contains at least one ':'
891 */
892 private String[] splitNames(String identifier) {
893 logger.info(identifier);
894 String [] strs = new String[2];
895 int first_colon = identifier.indexOf(":");
896 if(first_colon == -1) {
897 return null;
898 }
899 strs[0] = identifier.substring(0, first_colon);
900 strs[1] = identifier.substring(first_colon + 1);
901 return strs;
902 }
903 /** validate if the specified metadata prefix value is supported by the repository
904 * by checking it in the OAIConfig.xml
905 */
906 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
907 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
908
909 for(int i=0; i<prefix_list.getLength(); i++) {
910 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
911 return true;
912 }
913 }
914 return false;
915 }
916 private Element doGetRecord(Element req){
917 logger.info("");
918 /** arguments:
919 identifier: required
920 metadataPrefix: required
921 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
922 */
923 Document doc = this.converter.newDOM();
924 Element get_record = doc.createElement(OAIXML.GET_RECORD);
925
926 HashSet<String> valid_strs = new HashSet<String>();
927 valid_strs.add(OAIXML.IDENTIFIER);
928 valid_strs.add(OAIXML.METADATA_PREFIX);
929
930 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
931 HashMap<String, String> param_map = GSXML.getParamMap(params);
932
933 if(!areAllParamsValid(param_map, valid_strs) ||
934 params.getLength() == 0 ||
935 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
936 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
937 logger.error("must have the metadataPrefix/identifier parameter.");
938 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
939 }
940
941 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
942 String identifier = param_map.get(OAIXML.IDENTIFIER);
943
944 // verify the metadata prefix
945 if (repositorySupportsMetadataPrefix(prefix) == false) {
946 logger.error("requested prefix is not found in OAIConfig.xml");
947 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
948 }
949
950 // get the names
951 String[] strs = splitNames(identifier);
952 if(strs == null || strs.length < 2) {
953 logger.error("identifier is not in the form coll:id" + identifier);
954 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
955 }
956 //String name_of_site = strs[0];
957 String coll_name = strs[0];
958 String oid = strs[1];
959
960 //re-organize the request element
961 // reset the 'to' attribute
962 String verb = req.getAttribute(GSXML.TO_ATT);
963 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
964 // reset the identifier element
965 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
966 if (param != null) {
967 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
968 param.setAttribute(GSXML.VALUE_ATT, oid);
969 }
970
971 //Now send the request to the message router to process
972 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
973 msg.appendChild(doc.importNode(req, true));
974 Node result_node = mr.process(msg);
975 return converter.nodeToElement(result_node);
976 }
977
978 // See OAIConfig.xml
979 // dynamically works out what the earliestDateStamp is, since it varies by collection
980 // returns this time in *milliseconds*.
981 protected long getEarliestDateStamp(NodeList oai_coll) {
982 //do the earliestDatestamp
983 long earliestDatestamp = System.currentTimeMillis();
984 int oai_coll_size = oai_coll.getLength();
985 if (oai_coll_size == 0) {
986 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be 1970-01-01.");
987 earliestDatestamp = 0;
988 }
989 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
990 // we get the earliestDatestamp among the collections
991 for(int i=0; i<oai_coll_size; i++) {
992 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
993 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
994 }
995
996 return earliestDatestamp*1000; // converting from seconds to milliseconds
997 }
998}
999
1000
Note: See TracBrowser for help on using the repository browser.