source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28873

Last change on this file since 28873 was 28873, checked in by kjdon, 10 years ago

added destroy method to OAIServer, which calls cleanUp on the OAIReceptionist, which then calls cleanUp on MR, and OAIResumptionToken.saveTokensToFile, so we can store tokens through a server shut down

File size: 39.5 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** a converter class to parse XML and create Docs */
51 protected XMLConverter converter=null;
52
53 /** the configure file of this receptionist passed from the oai servlet. */
54 protected Element oai_config = null;
55
56 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
57 protected int resume_after = -1 ;
58
59 /** the message router that the Receptionist and Actions will talk to */
60 protected ModuleInterface mr = null;
61
62 // Some of the data/responses will not change while the servlet is running, so
63 // we can cache them
64
65 /** A list of all the collections available to this OAI server */
66 protected NodeList collection_list = null;
67 /** a vector of the names, for convenience */
68 protected Vector<String> collection_name_list = null;
69 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
70 protected boolean noRecordsMatch = false;
71
72 /** A set of all known 'sets' */
73 protected HashSet<String> set_set = null;
74
75 protected boolean has_super_colls = false;
76 /** a hash of super set-> collection list */
77 protected HashMap<String, Vector<String>> super_coll_map = null;
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86 this.converter = new XMLConverter();
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 if (!configureSetInfo()) {
122 // there are no sets
123 logger.error("No sets (collections) available for OAI");
124 return false;
125 }
126
127 //clear out expired resumption tokens stored in OAIResumptionToken.xml
128 OAIResumptionToken.init();
129 OAIResumptionToken.clearExpiredTokens();
130
131 return true;
132 }
133
134 // assuming that sets are static. If collections change then the servlet
135 // should be restarted.
136 private boolean configureSetInfo() {
137 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
138 // Will be like
139 // <oaiSuperSet>
140 // <SetSpec>xxx</SetSpec>
141 // <setName>xxx</SetName>
142 // <SetDescription>xxx</setDescription>
143 // </oaiSuperSet>
144 // The super set is listed in OAIConfig, and collections themselves state
145 // whether they are part of the super set or not.
146 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
147 HashMap<String, Element> super_coll_data = new HashMap<String, Element>();
148 if (super_coll_list.getLength() > 0) {
149 this.has_super_colls = true;
150 for (int i=0; i<super_coll_list.getLength(); i++) {
151 Element super_coll = (Element)super_coll_list.item(i);
152 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
153 if (set_spec != null) {
154 String name = GSXML.getNodeText(set_spec);
155 if (!name.equals("")) {
156 super_coll_data.put(name, super_coll);
157 logger.error("adding in super coll "+name);
158 }
159 }
160 }
161
162 if (super_coll_data.size()==0) {
163 this.has_super_colls = false;
164 }
165 }
166 if (this.has_super_colls == true) {
167 this.super_coll_map = new HashMap<String, Vector<String>>();
168 }
169 this.set_set = new HashSet<String>();
170
171 // next, we get a list of all the OAI enabled collections
172 // We get this by sending a listSets request to the MR
173 Document doc = this.converter.newDOM();
174 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
175
176 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
177 message.appendChild(request);
178 Node msg_node = mr.process(message);
179
180 if (msg_node == null) {
181 logger.error("returned msg_node from mr is null");
182 return false;
183 }
184 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
185 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
186 if (coll_list == null) {
187 logger.error("coll_list is null");
188 return false;
189 }
190
191 NodeList list = coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
192 int length = list.getLength();
193 if (length == 0) {
194 logger.error("length is 0");
195 noRecordsMatch = true;
196 return false;
197 }
198
199 this.collection_list = list;
200 this.collection_name_list = new Vector<String>();
201
202 Document listsets_doc = this.converter.newDOM();
203 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
204 this.listsets_response = getMessage(listsets_doc, listsets_element);
205
206 // Now, for each collection, get a list of all its sets
207 // might include subsets (classifiers) or super colls
208 // We'll reuse the first message, changing its type and to atts
209 request.setAttribute(GSXML.TYPE_ATT, "");
210 StringBuffer to = new StringBuffer();
211 for (int i=0; i<collection_list.getLength(); i++) {
212 if (i!=0) {
213 to.append(',');
214 }
215 String coll_id =((Element) collection_list.item(i)).getAttribute(GSXML.NAME_ATT);
216 logger.error("coll_id = "+coll_id);
217 to.append(coll_id+"/"+OAIXML.LIST_SETS);
218 this.collection_name_list.add(coll_id);
219 }
220 logger.error ("to att = "+to.toString());
221 request.setAttribute(GSXML.TO_ATT, to.toString());
222 // send to MR
223 msg_node = mr.process(message);
224 logger.error(this.converter.getPrettyString(msg_node));
225 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
226 for (int c=0; c<response_list.getLength(); c++) {
227 // for each collection's response
228 Element response = (Element)response_list.item(c);
229 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
230 logger.error("coll from response "+coll_name);
231 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
232 for (int j=0; j<set_list.getLength(); j++) {
233 // now check if it a super collection
234 Element set = (Element)set_list.item(j);
235 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
236 logger.error("set spec = "+set_spec);
237 // this may change if we add site name back in
238 // setSpecs will be collname or collname:subset or supercollname
239 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
240 // it must be a super coll spec
241 logger.error("found super coll, "+set_spec);
242 // check that it is a valid one from config
243 if (this.has_super_colls == true && super_coll_data.containsKey(set_spec)) {
244 Vector <String> subcolls = this.super_coll_map.get(set_spec);
245 if (subcolls == null) {
246 logger.error("its new!!");
247 // not in there yet
248 subcolls = new Vector<String>();
249 this.set_set.add(set_spec);
250 this.super_coll_map.put(set_spec, subcolls);
251 // the first time a supercoll is mentioned, add into the set list
252 logger.error("finding the set info "+this.converter.getPrettyString(super_coll_data.get(set_spec)));
253 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, super_coll_data.get(set_spec), OAIXML.SET, true));
254 }
255 // add this collection to the list for the super coll
256 subcolls.add(coll_name);
257 }
258 } else { // its either the coll itself or a subcoll
259 // add in the set
260 listsets_element.appendChild(listsets_doc.importNode(set, true));
261 this.set_set.add(set_spec);
262 }
263 } // for each set in the collection
264 } // for each OAI enabled collection
265 return true;
266 }
267
268 /** process using strings - just calls process using Elements */
269 public String process(String xml_in) {
270
271 Node message_node = this.converter.getDOM(xml_in);
272 Node page = process(message_node);
273 return this.converter.getString(page);
274 }
275
276 //Compose a message/response element used to send back to the OAIServer servlet.
277 //This method is only used within OAIReceptionist
278 private Element getMessage(Document doc, Element e) {
279 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
280 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
281 msg.appendChild(response);
282 response.appendChild(e);
283 return msg;
284 }
285
286 /** process - produce xml data in response to a request
287 * if something goes wrong, it returns null -
288 */
289 public Node process(Node message_node) {
290 logger.error("OAIReceptionist received request");
291
292 Element message = this.converter.nodeToElement(message_node);
293 logger.error(this.converter.getString(message));
294
295 // check that its a correct message tag
296 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
297 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
298 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
299 }
300
301 // get the request out of the message - assume that there is only one
302 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
303 if (request == null) {
304 logger.error(" message had no request!");
305 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
306 }
307 //At this stage, the value of 'to' attribute of the request must be the 'verb'
308 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
309 String verb = request.getAttribute(GSXML.TO_ATT);
310 if (verb.equals(OAIXML.IDENTIFY)) {
311 return doIdentify();
312 }
313 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
314 return doListMetadataFormats(request);
315 }
316 if (verb.equals(OAIXML.LIST_SETS)) {
317 // we have composed the list sets response on init
318 // Note this means that list sets never uses resumption tokens
319 return this.listsets_response;
320 }
321 if (verb.equals(OAIXML.GET_RECORD)) {
322 return doGetRecord(request);
323 }
324 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
325 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
326 }
327 if (verb.equals(OAIXML.LIST_RECORDS)) {
328 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
329 }
330 // should never get here as verbs were checked in OAIServer
331 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
332
333 }
334
335
336 private int getResumeAfter() {
337 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
338 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
339 return -1;
340 }
341 private String getRepositoryIdentifier() {
342 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
343 if (ri != null) {
344 return GSXML.getNodeText(ri);
345 }
346 return "";
347 }
348
349
350 /** if the param_map contains strings other than those in valid_strs, return false;
351 * otherwise true.
352 */
353 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
354 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
355 for(int i=0; i<param_list.size(); i++) {
356 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
357 if (valid_strs.contains(param_list.get(i)) == false) {
358 return false;
359 }
360 }
361 return true;
362 }
363
364 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
365 // options: from, until, set, metadataPrefix, resumptionToken
366 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
367 HashSet<String> valid_strs = new HashSet<String>();
368 valid_strs.add(OAIXML.FROM);
369 valid_strs.add(OAIXML.UNTIL);
370 valid_strs.add(OAIXML.SET);
371 valid_strs.add(OAIXML.METADATA_PREFIX);
372 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
373
374 Document result_doc = this.converter.newDOM();
375 Element result_element = result_doc.createElement(verb);
376 boolean result_token_needed = false; // does this result need to include a
377 // resumption token
378
379 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
380
381 HashMap<String, String> param_map = GSXML.getParamMap(params);
382
383 // are all the params valid?
384 if (!areAllParamsValid(param_map, valid_strs)) {
385 logger.error("One of the params is invalid");
386 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
387 // TODO, need to tell the user which one was invalid ??
388 }
389
390 // Do we have a resumption token??
391 String token = null;
392 String from = null;
393 String until = null;
394 boolean set_requested = false;
395 String set_spec_str = null;
396 String prefix_value = null;
397 int cursor = 0;
398 int current_cursor = 0;
399 String current_set = null;
400
401 int total_size = -1; // we are only going to set this in resumption
402 // token if it is easy to work out, i.e. not sending extra requests to
403 // MR just to calculate total size
404
405 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
406 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
407 // validate resumptionToken
408 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
409 logger.info("has resumptionToken " + token);
410 if(OAIResumptionToken.isValidToken(token) == false) {
411 logger.error("token is not valid");
412 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
413 }
414 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
415 // initialise the request params from the stored token data
416 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
417 from = token_data.get(OAIXML.FROM);
418 until = token_data.get(OAIXML.UNTIL);
419 set_spec_str = token_data.get(OAIXML.SET);
420 if (set_spec_str != null) {
421 set_requested = true;
422 }
423 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
424 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
425 try {
426 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
427 cursor = cursor + resume_after; // increment cursor
428 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
429 } catch (NumberFormatException e) {
430 logger.error("tried to parse int from cursor data and failed");
431 }
432
433 }
434 else {
435 // no resumption token, lets check the other params
436 // there must be a metadataPrefix
437 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
438 logger.error("metadataPrefix param required");
439 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
440 }
441
442 //if there are any date params, check they're of the right format
443 from = param_map.get(OAIXML.FROM);
444 if(from != null) {
445 Date from_date = OAIXML.getDate(from);
446 if(from_date == null) {
447 logger.error("invalid date: " + from);
448 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
449 }
450 }
451 until = param_map.get(OAIXML.UNTIL);
452 if(until != null) {
453 Date until_date = OAIXML.getDate(until);
454 if(until_date == null) {
455 logger.error("invalid date: " + until);
456 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
457 }
458 }
459 if(from != null && until != null) { // check they are of the same date-time format (granularity)
460 if(from.length() != until.length()) {
461 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
462 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
463 }
464 }
465
466 // check the set arg is a set we know about
467 set_requested = param_map.containsKey(OAIXML.SET);
468 set_spec_str = null;
469 if(set_requested == true) {
470 set_spec_str = param_map.get(OAIXML.SET);
471 if (!this.set_set.contains(set_spec_str)) {
472 // the set is not one we know about
473 logger.error("requested set is not found in this repository");
474 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
475
476 }
477 }
478 // Is the metadataPrefix arg one this repository supports?
479 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
480 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
481 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
482 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
483 }
484
485 } // else no resumption token, check other params
486
487 // Whew. Now we have validated the params, we can work on doing the actual
488 // request
489
490
491 Document doc = this.converter.newDOM();
492 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
493 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
494 // TODO does this need a type???
495 mr_msg.appendChild(mr_req);
496
497 // copy in the from/until params if there
498 if (from != null) {
499 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
500 }
501 if (until != null) {
502 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
503 }
504 // add metadataPrefix
505 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
506
507 // do we have a set???
508 // if no set, we send to all collections in the collection list
509 // if super set, we send to all collections in super set list
510 // if a single collection, send to it
511 // if a subset, send to the collection
512 Vector<String> current_coll_list = null;
513 boolean single_collection = false;
514 if (set_requested == false) {
515 // just do all colls
516 current_coll_list = collection_name_list;
517 }
518 else if (has_super_colls && super_coll_map.containsKey(set_spec_str)) {
519 current_coll_list = super_coll_map.get(set_spec_str);
520 }
521 else {
522 current_coll_list = new Vector<String>();
523 if (set_spec_str.indexOf(":") != -1) {
524 // we have a subset
525 //add the set param back into the request, but send the request to the collection
526 String col_name = set_spec_str.substring(0, set_spec_str.indexOf(":"));
527 current_coll_list.add(col_name);
528 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
529 single_collection = true;
530 }
531 else {
532 // it must be a single collection name
533 current_coll_list.add(set_spec_str);
534 single_collection = true;
535 }
536 }
537
538 int num_collected_records = 0;
539 int start_point = current_cursor; // may not be 0 if we are using a resumption token
540 String resumption_collection = "";
541 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
542
543 // iterate through the list of collections and send the request to each
544
545 int start_coll=0;
546 if (current_set != null) {
547 // we are resuming a previous request, need to locate the first collection
548 for (int i=0; i<current_coll_list.size(); i++) {
549 if (current_set.equals(current_coll_list.get(i))) {
550 start_coll = i;
551 break;
552 }
553 }
554 }
555
556 for (int i=start_coll; i<current_coll_list.size(); i++) {
557 String current_coll = current_coll_list.get(i);
558 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
559
560 Element result = (Element)mr.process(mr_msg);
561 logger.error(verb+ " result for coll "+current_coll);
562 logger.error(this.converter.getPrettyString(result));
563 if (result == null) {
564 logger.info("message router returns null");
565 // do what??? carry on? fail??
566 return OAIXML.createErrorMessage("Internal service returns null", "");
567 }
568 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
569 if(res == null) {
570 logger.info("response element in xml_result is null");
571 return OAIXML.createErrorMessage("Internal service returns null", "");
572 }
573 NodeList record_list = res.getElementsByTagName(record_type);
574 int num_records = record_list.getLength();
575 if(num_records == 0) {
576 logger.info("message router returns 0 records for coll "+current_coll);
577 continue; // try the next collection
578 }
579 if (single_collection) {
580 total_size = num_records;
581 }
582 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
583 if (records_to_add > (num_records-start_point)) {
584 records_to_add = num_records-start_point;
585 }
586 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
587 num_collected_records += records_to_add;
588
589 // do we need to stop here, and do we need to issue a resumption token?
590 if (resume_after > 0 && num_collected_records == resume_after) {
591 // we have finished collecting records at the moment.
592 // but are we conincidentally at the end? or are there more to go?
593 if (records_to_add < (num_records - start_point)) {
594 // we have added less than this collection had
595 start_point += records_to_add;
596 resumption_collection = current_coll;
597 result_token_needed = true;
598 }
599 else {
600 // we added all this collection had to offer
601 // is there another collection in the list??
602 if (i<current_coll_list.size()-1) {
603 result_token_needed = true;
604 start_point = 0;
605 resumption_collection = current_coll_list.get(i+1);
606 }
607 else {
608 // we have finished one collection and there are no more collection
609 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
610 logger.error("at end of list, need empty result token");
611 empty_result_token = true;
612 }
613 }
614 break;
615 }
616 start_point = 0; // only the first one will have start non-zero, if we
617 // have a resumption token
618
619 } // for each collection
620
621 if (num_collected_records ==0) {
622 // there were no matching results
623 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
624 }
625
626 if (num_collected_records < resume_after) {
627 // we have been through all collections, and there are no more
628 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
629 if (result_token_needed) {
630 empty_result_token = true;
631 }
632 }
633
634 if (result_token_needed) {
635 // we need a resumption token
636 if (empty_result_token) {
637 logger.error("have empty result token");
638 token = "";
639 } else {
640 if (token != null) {
641 // we had a token for this request, we can just update it
642 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
643 } else {
644 // we are generating a new one
645 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
646 }
647 }
648
649 // result token XML
650 long expiration_date = -1;
651 if (empty_result_token) {
652 // we know how many records in total as we have sent them all
653 total_size = cursor+num_collected_records;
654 } else {
655 // non-empty token, set the expiration date
656 expiration_date = OAIResumptionToken.getExpirationDate(token);
657 }
658 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
659 // OAIXML.addToken(token_elem); // store it
660 result_element.appendChild(token_elem); // add to the result
661 }
662
663
664 return getMessage(result_doc, result_element);
665 }
666
667 private void addRecordsToList(Document doc, Element result_element, NodeList
668 record_list, int start_point, int num_records) {
669 int end_point = start_point + num_records;
670 for (int i=start_point; i<end_point; i++) {
671 result_element.appendChild(doc.importNode(record_list.item(i), true));
672 }
673 }
674
675
676 // method exclusively used by doListRecords/doListIdentifiers
677 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
678 for (int i=start_point; i<end_point; i++) {
679 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
680 }
681 }
682 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
683 if(result == null) {
684 //in the first round, result is null
685 return msg;
686 }
687 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
688 if(res_in_result == null) { // return the results of all other collections accumulated so far
689 return msg;
690 }
691 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
692 if(msg == null) {
693 return result;
694 }
695
696 //e.g., get all <record> elements from the returned message. There may be none of
697 //such element, for example, the collection service returned an error message
698 NodeList elem_list = msg.getElementsByTagName(elem_name);
699
700 for (int i=0; i<elem_list.getLength(); i++) {
701 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
702 }
703 return result;
704 }
705
706
707 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
708 * The first one is handled here, and the last two are processed by OAIPMH.
709 */
710 private Element doListMetadataFormats(Element req) {
711 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
712 //, or there is no parameter; otherwise it is an error
713 //logger.info("" + this.converter.getString(msg));
714
715 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
716 Element param = null;
717 Document lmf_doc = this.converter.newDOM();
718 if(params.getLength() == 0) {
719 //this is requesting metadata formats for the whole repository
720 //read the oaiConfig.xml file, return the metadata formats specified there.
721 if (this.listmetadataformats_response != null) {
722 // we have already created it
723 return this.listmetadataformats_response;
724 }
725
726 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
727
728 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
729 if(format_list == null) {
730 logger.error("OAIConfig.xml must contain the supported metadata formats");
731 // TODO this is internal error, what to do???
732 return getMessage(lmf_doc, list_metadata_formats);
733 }
734 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
735 for(int i=0; i<formats.getLength(); i++) {
736 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
737 Element first_meta_format = (Element)formats.item(i);
738 //the element also contains mappings, but we don't want them
739 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
740 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
741 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
742 list_metadata_formats.appendChild(meta_fmt);
743 }
744 return getMessage(lmf_doc, list_metadata_formats);
745
746
747 }
748
749 if (params.getLength() > 1) {
750 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
751 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
752 }
753
754 // This is a request for the metadata of a particular item with an identifier
755 /**the request xml is in the form: <request>
756 * <param name=.../>
757 * </request>
758 *And there is a param element and one element only. (No paramList element in between).
759 */
760 param = (Element)params.item(0);
761 String param_name = param.getAttribute(GSXML.NAME_ATT);
762 String identifier = "";
763 if (!param_name.equals(OAIXML.IDENTIFIER)) {
764 //Bad argument
765 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
766 }
767
768 identifier = param.getAttribute(GSXML.VALUE_ATT);
769 // the identifier is in the form: <coll_name>:<OID>
770 // so it must contain at least two ':' characters
771 String[] strs = identifier.split(":");
772 if(strs == null || strs.length < 2) {
773 // the OID may also contain ':'
774 logger.error("identifier is not in the form coll:id" + identifier);
775 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
776 }
777
778 // send request to message router
779 // get the names
780 strs = splitNames(identifier);
781 if(strs == null || strs.length < 2) {
782 logger.error("identifier is not in the form coll:id" + identifier);
783 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
784 }
785 //String name_of_site = strs[0];
786 String coll_name = strs[0];
787 String oid = strs[1];
788
789 //re-organize the request element
790 // reset the 'to' attribute
791 String verb = req.getAttribute(GSXML.TO_ATT);
792 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
793 // reset the identifier element
794 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
795 param.setAttribute(GSXML.VALUE_ATT, oid);
796
797 // TODO is this the best way to do this???? should we create a new request???
798 Element message = req.getOwnerDocument().createElement(GSXML.MESSAGE_ELEM);
799 message.appendChild(req);
800 //Now send the request to the message router to process
801 Node result_node = mr.process(message);
802 return converter.nodeToElement(result_node);
803 }
804
805
806
807
808 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
809 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
810 if(original_element != null) {
811 copyNode(to_elem, original_element);
812 }
813 }
814
815 private void copyNode(Element to_elem, Node original_element) {
816 to_elem.appendChild(to_elem.getOwnerDocument().importNode(original_element, true));
817
818 }
819
820 private Element doIdentify() {
821 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
822 logger.info("");
823 if (this.identify_response != null) {
824 // we have already created it
825 return this.identify_response;
826 }
827 Document doc = this.converter.newDOM();
828 Element identify = doc.createElement(OAIXML.IDENTIFY);
829 //do the repository name
830 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
831 //do the baseurl
832 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
833 //do the protocol version
834 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
835
836 //There can be more than one admin email according to the OAI specification
837 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
838 int num_admin = 0;
839 Element from_admin_email = null;
840 if (admin_emails != null) {
841 num_admin = admin_emails.getLength();
842 }
843 for (int i=0; i<num_admin; i++) {
844 copyNode(identify, admin_emails.item(i));
845 }
846
847 //do the earliestDatestamp
848 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
849 //ask the message router for a list of oai collections
850 //NodeList oai_coll = getOAICollectionList();
851 long earliestDatestamp = getEarliestDateStamp(collection_list);
852 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
853 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
854 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
855 identify.appendChild(earliestDatestamp_elem);
856
857 //do the deletedRecord
858 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
859 //do the granularity
860 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
861
862 // output the oai identifier
863 Element description = doc.createElement(OAIXML.DESCRIPTION);
864 identify.appendChild(description);
865 // TODO, make this a valid id
866 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
867 description.appendChild(oaiIdentifier);
868
869 // if there are any oaiInfo metadata, add them in too.
870 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
871 if (info != null) {
872 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
873 if (meta != null && meta.getLength() > 0) {
874 Element gsdl = OAIXML.createGSDLElement(doc);
875 description.appendChild(gsdl);
876 for (int m = 0; m<meta.getLength(); m++) {
877 copyNode(gsdl, meta.item(m));
878 }
879
880 }
881 }
882 this.identify_response = identify;
883 return getMessage(doc, identify);
884 }
885 //split setSpec (site_name:coll_name) into an array of strings
886 //It has already been checked that the set_spec contains at least one ':'
887 private String[] splitSetSpec(String set_spec) {
888 logger.info(set_spec);
889 String[] strs = new String[2];
890 int colon_index = set_spec.indexOf(":");
891 strs[0] = set_spec.substring(0, colon_index);
892 strs[1] = set_spec.substring(colon_index + 1);
893 return strs;
894 }
895 /** split the identifier into <collection + OID> as an array
896 It has already been checked that the 'identifier' contains at least one ':'
897 */
898 private String[] splitNames(String identifier) {
899 logger.info(identifier);
900 String [] strs = new String[2];
901 int first_colon = identifier.indexOf(":");
902 if(first_colon == -1) {
903 return null;
904 }
905 strs[0] = identifier.substring(0, first_colon);
906 strs[1] = identifier.substring(first_colon + 1);
907 return strs;
908 }
909 /** validate if the specified metadata prefix value is supported by the repository
910 * by checking it in the OAIConfig.xml
911 */
912 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
913 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
914
915 for(int i=0; i<prefix_list.getLength(); i++) {
916 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
917 return true;
918 }
919 }
920 return false;
921 }
922 private Element doGetRecord(Element req){
923 logger.info("");
924 /** arguments:
925 identifier: required
926 metadataPrefix: required
927 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
928 */
929 Document doc = this.converter.newDOM();
930 Element get_record = doc.createElement(OAIXML.GET_RECORD);
931
932 HashSet<String> valid_strs = new HashSet<String>();
933 valid_strs.add(OAIXML.IDENTIFIER);
934 valid_strs.add(OAIXML.METADATA_PREFIX);
935
936 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
937 HashMap<String, String> param_map = GSXML.getParamMap(params);
938
939 if(!areAllParamsValid(param_map, valid_strs) ||
940 params.getLength() == 0 ||
941 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
942 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
943 logger.error("must have the metadataPrefix/identifier parameter.");
944 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
945 }
946
947 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
948 String identifier = param_map.get(OAIXML.IDENTIFIER);
949
950 // verify the metadata prefix
951 if (repositorySupportsMetadataPrefix(prefix) == false) {
952 logger.error("requested prefix is not found in OAIConfig.xml");
953 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
954 }
955
956 // get the names
957 String[] strs = splitNames(identifier);
958 if(strs == null || strs.length < 2) {
959 logger.error("identifier is not in the form coll:id" + identifier);
960 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
961 }
962 //String name_of_site = strs[0];
963 String coll_name = strs[0];
964 String oid = strs[1];
965
966 //re-organize the request element
967 // reset the 'to' attribute
968 String verb = req.getAttribute(GSXML.TO_ATT);
969 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
970 // reset the identifier element
971 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
972 if (param != null) {
973 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
974 param.setAttribute(GSXML.VALUE_ATT, oid);
975 }
976
977 //Now send the request to the message router to process
978 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
979 msg.appendChild(doc.importNode(req, true));
980 Node result_node = mr.process(msg);
981 return converter.nodeToElement(result_node);
982 }
983
984 // See OAIConfig.xml
985 // dynamically works out what the earliestDateStamp is, since it varies by collection
986 // returns this time in *milliseconds*.
987 protected long getEarliestDateStamp(NodeList oai_coll) {
988 //do the earliestDatestamp
989 long earliestDatestamp = System.currentTimeMillis();
990 int oai_coll_size = oai_coll.getLength();
991 if (oai_coll_size == 0) {
992 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be 1970-01-01.");
993 earliestDatestamp = 0;
994 }
995 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
996 // we get the earliestDatestamp among the collections
997 for(int i=0; i<oai_coll_size; i++) {
998 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
999 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1000 }
1001
1002 return earliestDatestamp*1000; // converting from seconds to milliseconds
1003 }
1004}
1005
1006
Note: See TracBrowser for help on using the repository browser.