source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28881

Last change on this file since 28881 was 28881, checked in by kjdon, 10 years ago

added getCollectionListForSet method to avoid duplicating code. added collectionsChangedSinceTime method to check whether any collecitons have changed since a resumption token was issued - if so, then we need to expire the token.

File size: 42.1 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** a converter class to parse XML and create Docs */
51 protected XMLConverter converter=null;
52
53 /** the configure file of this receptionist passed from the oai servlet. */
54 protected Element oai_config = null;
55
56 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
57 protected int resume_after = -1 ;
58
59 /** the message router that the Receptionist and Actions will talk to */
60 protected ModuleInterface mr = null;
61
62 // Some of the data/responses will not change while the servlet is running, so
63 // we can cache them
64
65 /** A list of all the collections available to this OAI server */
66 protected Element collection_list = null;
67 /** a vector of the names, for convenience */
68 protected Vector<String> collection_name_list = null;
69 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
70 protected boolean noRecordsMatch = false;
71
72 /** A set of all known 'sets' */
73 protected HashSet<String> set_set = null;
74
75 protected boolean has_super_colls = false;
76 /** a hash of super set-> collection list */
77 protected HashMap<String, Vector<String>> super_coll_map = null;
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86 this.converter = new XMLConverter();
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 if (!configureSetInfo()) {
122 // there are no sets
123 logger.error("No sets (collections) available for OAI");
124 return false;
125 }
126
127 //clear out expired resumption tokens stored in OAIResumptionToken.xml
128 OAIResumptionToken.init();
129 OAIResumptionToken.clearExpiredTokens();
130
131 return true;
132 }
133
134 // assuming that sets are static. If collections change then the servlet
135 // should be restarted.
136 private boolean configureSetInfo() {
137 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
138 // Will be like
139 // <oaiSuperSet>
140 // <SetSpec>xxx</SetSpec>
141 // <setName>xxx</SetName>
142 // <SetDescription>xxx</setDescription>
143 // </oaiSuperSet>
144 // The super set is listed in OAIConfig, and collections themselves state
145 // whether they are part of the super set or not.
146 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
147 HashMap<String, Element> super_coll_data = new HashMap<String, Element>();
148 if (super_coll_list.getLength() > 0) {
149 this.has_super_colls = true;
150 for (int i=0; i<super_coll_list.getLength(); i++) {
151 Element super_coll = (Element)super_coll_list.item(i);
152 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
153 if (set_spec != null) {
154 String name = GSXML.getNodeText(set_spec);
155 if (!name.equals("")) {
156 super_coll_data.put(name, super_coll);
157 logger.error("adding in super coll "+name);
158 }
159 }
160 }
161
162 if (super_coll_data.size()==0) {
163 this.has_super_colls = false;
164 }
165 }
166 if (this.has_super_colls == true) {
167 this.super_coll_map = new HashMap<String, Vector<String>>();
168 }
169 this.set_set = new HashSet<String>();
170
171 // next, we get a list of all the OAI enabled collections
172 // We get this by sending a listSets request to the MR
173 Document doc = this.converter.newDOM();
174 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
175
176 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
177 message.appendChild(request);
178 Node msg_node = mr.process(message);
179
180 if (msg_node == null) {
181 logger.error("returned msg_node from mr is null");
182 return false;
183 }
184 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
185 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
186 if (coll_list == null) {
187 logger.error("coll_list is null");
188 return false;
189 }
190
191 this.collection_list = (Element)doc.importNode(coll_list, true);
192
193 // go through and store a list of collection names for convenience
194 // also create a 'to' attribute
195 Node child = this.collection_list.getFirstChild();
196 if (child == null) {
197 logger.error("collection list has no children");
198 noRecordsMatch = true;
199 return false;
200 }
201
202 this.collection_name_list = new Vector<String>();
203 StringBuffer to = new StringBuffer();
204 boolean first = true;
205 while (child != null) {
206 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
207 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
208 this.collection_name_list.add(coll_id);
209 if (!first) {
210 to.append(',');
211 }
212 first = false;
213 to.append(coll_id+"/"+OAIXML.LIST_SETS);
214 }
215 child = child.getNextSibling();
216 }
217 if (first) {
218 // we haven't found any collections
219 logger.error("found no collection elements in collectionList");
220 noRecordsMatch = true;
221 return false;
222 }
223 Document listsets_doc = this.converter.newDOM();
224 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
225 this.listsets_response = getMessage(listsets_doc, listsets_element);
226
227 // Now, for each collection, get a list of all its sets
228 // might include subsets (classifiers) or super colls
229 // We'll reuse the first message, changing its type and to atts
230 request.setAttribute(GSXML.TYPE_ATT, "");
231 request.setAttribute(GSXML.TO_ATT, to.toString());
232 // send to MR
233 msg_node = mr.process(message);
234 logger.error(this.converter.getPrettyString(msg_node));
235 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
236 for (int c=0; c<response_list.getLength(); c++) {
237 // for each collection's response
238 Element response = (Element)response_list.item(c);
239 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
240 logger.error("coll from response "+coll_name);
241 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
242 for (int j=0; j<set_list.getLength(); j++) {
243 // now check if it a super collection
244 Element set = (Element)set_list.item(j);
245 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
246 logger.error("set spec = "+set_spec);
247 // this may change if we add site name back in
248 // setSpecs will be collname or collname:subset or supercollname
249 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
250 // it must be a super coll spec
251 logger.error("found super coll, "+set_spec);
252 // check that it is a valid one from config
253 if (this.has_super_colls == true && super_coll_data.containsKey(set_spec)) {
254 Vector <String> subcolls = this.super_coll_map.get(set_spec);
255 if (subcolls == null) {
256 logger.error("its new!!");
257 // not in there yet
258 subcolls = new Vector<String>();
259 this.set_set.add(set_spec);
260 this.super_coll_map.put(set_spec, subcolls);
261 // the first time a supercoll is mentioned, add into the set list
262 logger.error("finding the set info "+this.converter.getPrettyString(super_coll_data.get(set_spec)));
263 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, super_coll_data.get(set_spec), OAIXML.SET, true));
264 }
265 // add this collection to the list for the super coll
266 subcolls.add(coll_name);
267 }
268 } else { // its either the coll itself or a subcoll
269 // add in the set
270 listsets_element.appendChild(listsets_doc.importNode(set, true));
271 this.set_set.add(set_spec);
272 }
273 } // for each set in the collection
274 } // for each OAI enabled collection
275 return true;
276 }
277
278 /** process using strings - just calls process using Elements */
279 public String process(String xml_in) {
280
281 Node message_node = this.converter.getDOM(xml_in);
282 Node page = process(message_node);
283 return this.converter.getString(page);
284 }
285
286 //Compose a message/response element used to send back to the OAIServer servlet.
287 //This method is only used within OAIReceptionist
288 private Element getMessage(Document doc, Element e) {
289 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
290 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
291 msg.appendChild(response);
292 response.appendChild(e);
293 return msg;
294 }
295
296 /** process - produce xml data in response to a request
297 * if something goes wrong, it returns null -
298 */
299 public Node process(Node message_node) {
300 logger.error("OAIReceptionist received request");
301
302 Element message = this.converter.nodeToElement(message_node);
303 logger.error(this.converter.getString(message));
304
305 // check that its a correct message tag
306 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
307 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
308 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
309 }
310
311 // get the request out of the message - assume that there is only one
312 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
313 if (request == null) {
314 logger.error(" message had no request!");
315 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
316 }
317 //At this stage, the value of 'to' attribute of the request must be the 'verb'
318 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
319 String verb = request.getAttribute(GSXML.TO_ATT);
320 if (verb.equals(OAIXML.IDENTIFY)) {
321 return doIdentify();
322 }
323 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
324 return doListMetadataFormats(request);
325 }
326 if (verb.equals(OAIXML.LIST_SETS)) {
327 // we have composed the list sets response on init
328 // Note this means that list sets never uses resumption tokens
329 return this.listsets_response;
330 }
331 if (verb.equals(OAIXML.GET_RECORD)) {
332 return doGetRecord(request);
333 }
334 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
335 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
336 }
337 if (verb.equals(OAIXML.LIST_RECORDS)) {
338 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
339 }
340 // should never get here as verbs were checked in OAIServer
341 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
342
343 }
344
345
346 private int getResumeAfter() {
347 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
348 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
349 return -1;
350 }
351 private String getRepositoryIdentifier() {
352 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
353 if (ri != null) {
354 return GSXML.getNodeText(ri);
355 }
356 return "";
357 }
358
359
360 /** if the param_map contains strings other than those in valid_strs, return false;
361 * otherwise true.
362 */
363 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
364 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
365 for(int i=0; i<param_list.size(); i++) {
366 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
367 if (valid_strs.contains(param_list.get(i)) == false) {
368 return false;
369 }
370 }
371 return true;
372 }
373
374 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
375 // options: from, until, set, metadataPrefix, resumptionToken
376 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
377 HashSet<String> valid_strs = new HashSet<String>();
378 valid_strs.add(OAIXML.FROM);
379 valid_strs.add(OAIXML.UNTIL);
380 valid_strs.add(OAIXML.SET);
381 valid_strs.add(OAIXML.METADATA_PREFIX);
382 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
383
384 Document result_doc = this.converter.newDOM();
385 Element result_element = result_doc.createElement(verb);
386 boolean result_token_needed = false; // does this result need to include a
387 // resumption token
388
389 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
390
391 HashMap<String, String> param_map = GSXML.getParamMap(params);
392
393 // are all the params valid?
394 if (!areAllParamsValid(param_map, valid_strs)) {
395 logger.error("One of the params is invalid");
396 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
397 // TODO, need to tell the user which one was invalid ??
398 }
399
400 // Do we have a resumption token??
401 String token = null;
402 String from = null;
403 String until = null;
404 boolean set_requested = false;
405 String set_spec_str = null;
406 String prefix_value = null;
407 int cursor = 0;
408 int current_cursor = 0;
409 String current_set = null;
410 long initial_time = 0;
411
412 int total_size = -1; // we are only going to set this in resumption
413 // token if it is easy to work out, i.e. not sending extra requests to
414 // MR just to calculate total size
415
416 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
417 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
418 // validate resumptionToken
419 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
420 logger.info("has resumptionToken " + token);
421 if(OAIResumptionToken.isValidToken(token) == false) {
422 logger.error("token is not valid");
423 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
424 }
425 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
426 // initialise the request params from the stored token data
427 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
428 from = token_data.get(OAIXML.FROM);
429 until = token_data.get(OAIXML.UNTIL);
430 set_spec_str = token_data.get(OAIXML.SET);
431 if (set_spec_str != null) {
432 set_requested = true;
433 }
434 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
435 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
436 try {
437 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
438 cursor = cursor + resume_after; // increment cursor
439 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
440 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
441 } catch (NumberFormatException e) {
442 logger.error("tried to parse int from cursor data and failed");
443 }
444
445 // check that the collections/sets haven't changed since the token was issued
446 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
447 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
448 OAIResumptionToken.expireToken(token);
449 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
450 }
451 }
452 else {
453 // no resumption token, lets check the other params
454 // there must be a metadataPrefix
455 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
456 logger.error("metadataPrefix param required");
457 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
458 }
459
460 //if there are any date params, check they're of the right format
461 from = param_map.get(OAIXML.FROM);
462 if(from != null) {
463 Date from_date = OAIXML.getDate(from);
464 if(from_date == null) {
465 logger.error("invalid date: " + from);
466 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
467 }
468 }
469 until = param_map.get(OAIXML.UNTIL);
470 if(until != null) {
471 Date until_date = OAIXML.getDate(until);
472 if(until_date == null) {
473 logger.error("invalid date: " + until);
474 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
475 }
476 }
477 if(from != null && until != null) { // check they are of the same date-time format (granularity)
478 if(from.length() != until.length()) {
479 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
480 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
481 }
482 }
483
484 // check the set arg is a set we know about
485 set_requested = param_map.containsKey(OAIXML.SET);
486 set_spec_str = null;
487 if(set_requested == true) {
488 set_spec_str = param_map.get(OAIXML.SET);
489 if (!this.set_set.contains(set_spec_str)) {
490 // the set is not one we know about
491 logger.error("requested set is not found in this repository");
492 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
493
494 }
495 }
496 // Is the metadataPrefix arg one this repository supports?
497 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
498 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
499 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
500 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
501 }
502
503 } // else no resumption token, check other params
504
505 // Whew. Now we have validated the params, we can work on doing the actual
506 // request
507
508
509 Document doc = this.converter.newDOM();
510 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
511 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
512 // TODO does this need a type???
513 mr_msg.appendChild(mr_req);
514
515 // copy in the from/until params if there
516 if (from != null) {
517 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
518 }
519 if (until != null) {
520 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
521 }
522 // add metadataPrefix
523 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
524
525 // do we have a set???
526 // if no set, we send to all collections in the collection list
527 // if super set, we send to all collections in super set list
528 // if a single collection, send to it
529 // if a subset, send to the collection
530 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
531 boolean single_collection = false;
532 if (current_coll_list.size() == 1) {
533 single_collection = true;
534 }
535 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
536 // we have a subset - add the set param back in
537 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
538 }
539
540 int num_collected_records = 0;
541 int start_point = current_cursor; // may not be 0 if we are using a resumption token
542 String resumption_collection = "";
543 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
544
545 // iterate through the list of collections and send the request to each
546
547 int start_coll=0;
548 if (current_set != null) {
549 // we are resuming a previous request, need to locate the first collection
550 for (int i=0; i<current_coll_list.size(); i++) {
551 if (current_set.equals(current_coll_list.get(i))) {
552 start_coll = i;
553 break;
554 }
555 }
556 }
557
558 for (int i=start_coll; i<current_coll_list.size(); i++) {
559 String current_coll = current_coll_list.get(i);
560 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
561
562 Element result = (Element)mr.process(mr_msg);
563 logger.error(verb+ " result for coll "+current_coll);
564 logger.error(this.converter.getPrettyString(result));
565 if (result == null) {
566 logger.info("message router returns null");
567 // do what??? carry on? fail??
568 return OAIXML.createErrorMessage("Internal service returns null", "");
569 }
570 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
571 if(res == null) {
572 logger.info("response element in xml_result is null");
573 return OAIXML.createErrorMessage("Internal service returns null", "");
574 }
575 NodeList record_list = res.getElementsByTagName(record_type);
576 int num_records = record_list.getLength();
577 if(num_records == 0) {
578 logger.info("message router returns 0 records for coll "+current_coll);
579 continue; // try the next collection
580 }
581 if (single_collection) {
582 total_size = num_records;
583 }
584 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
585 if (records_to_add > (num_records-start_point)) {
586 records_to_add = num_records-start_point;
587 }
588 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
589 num_collected_records += records_to_add;
590
591 // do we need to stop here, and do we need to issue a resumption token?
592 if (resume_after > 0 && num_collected_records == resume_after) {
593 // we have finished collecting records at the moment.
594 // but are we conincidentally at the end? or are there more to go?
595 if (records_to_add < (num_records - start_point)) {
596 // we have added less than this collection had
597 start_point += records_to_add;
598 resumption_collection = current_coll;
599 result_token_needed = true;
600 }
601 else {
602 // we added all this collection had to offer
603 // is there another collection in the list??
604 if (i<current_coll_list.size()-1) {
605 result_token_needed = true;
606 start_point = 0;
607 resumption_collection = current_coll_list.get(i+1);
608 }
609 else {
610 // we have finished one collection and there are no more collection
611 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
612 logger.error("at end of list, need empty result token");
613 empty_result_token = true;
614 }
615 }
616 break;
617 }
618 start_point = 0; // only the first one will have start non-zero, if we
619 // have a resumption token
620
621 } // for each collection
622
623 if (num_collected_records ==0) {
624 // there were no matching results
625 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
626 }
627
628 if (num_collected_records < resume_after) {
629 // we have been through all collections, and there are no more
630 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
631 if (result_token_needed) {
632 empty_result_token = true;
633 }
634 }
635
636 if (result_token_needed) {
637 // we need a resumption token
638 if (empty_result_token) {
639 logger.error("have empty result token");
640 token = "";
641 } else {
642 if (token != null) {
643 // we had a token for this request, we can just update it
644 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
645 } else {
646 // we are generating a new one
647 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
648 }
649 }
650
651 // result token XML
652 long expiration_date = -1;
653 if (empty_result_token) {
654 // we know how many records in total as we have sent them all
655 total_size = cursor+num_collected_records;
656 } else {
657 // non-empty token, set the expiration date
658 expiration_date = OAIResumptionToken.getExpirationDate(token);
659 }
660 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
661 // OAIXML.addToken(token_elem); // store it
662 result_element.appendChild(token_elem); // add to the result
663 }
664
665
666 return getMessage(result_doc, result_element);
667 }
668
669 private Vector<String> getCollectionListForSet(String set) {
670 if (set == null) {
671 // no set requested, need the complete collection list
672 return this.collection_name_list;
673 }
674 if (has_super_colls && super_coll_map.containsKey(set)) {
675 return super_coll_map.get(set);
676 }
677 //********************8
678 Vector<String> coll_list = new Vector<String>();
679 if (set.indexOf(":") != -1) {
680 String col_name = set.substring(0, set.indexOf(":"));
681 coll_list.add(col_name);
682 }
683 else {
684 coll_list.add(set);
685 }
686 return coll_list;
687 }
688 private void addRecordsToList(Document doc, Element result_element, NodeList
689 record_list, int start_point, int num_records) {
690 int end_point = start_point + num_records;
691 for (int i=start_point; i<end_point; i++) {
692 result_element.appendChild(doc.importNode(record_list.item(i), true));
693 }
694 }
695
696
697 // method exclusively used by doListRecords/doListIdentifiers
698 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
699 for (int i=start_point; i<end_point; i++) {
700 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
701 }
702 }
703 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
704 if(result == null) {
705 //in the first round, result is null
706 return msg;
707 }
708 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
709 if(res_in_result == null) { // return the results of all other collections accumulated so far
710 return msg;
711 }
712 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
713 if(msg == null) {
714 return result;
715 }
716
717 //e.g., get all <record> elements from the returned message. There may be none of
718 //such element, for example, the collection service returned an error message
719 NodeList elem_list = msg.getElementsByTagName(elem_name);
720
721 for (int i=0; i<elem_list.getLength(); i++) {
722 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
723 }
724 return result;
725 }
726
727
728 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
729 * The first one is handled here, and the last two are processed by OAIPMH.
730 */
731 private Element doListMetadataFormats(Element req) {
732 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
733 //, or there is no parameter; otherwise it is an error
734 //logger.info("" + this.converter.getString(msg));
735
736 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
737 Element param = null;
738 Document lmf_doc = this.converter.newDOM();
739 if(params.getLength() == 0) {
740 //this is requesting metadata formats for the whole repository
741 //read the oaiConfig.xml file, return the metadata formats specified there.
742 if (this.listmetadataformats_response != null) {
743 // we have already created it
744 return this.listmetadataformats_response;
745 }
746
747 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
748
749 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
750 if(format_list == null) {
751 logger.error("OAIConfig.xml must contain the supported metadata formats");
752 // TODO this is internal error, what to do???
753 return getMessage(lmf_doc, list_metadata_formats);
754 }
755 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
756 for(int i=0; i<formats.getLength(); i++) {
757 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
758 Element first_meta_format = (Element)formats.item(i);
759 //the element also contains mappings, but we don't want them
760 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
761 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
762 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
763 list_metadata_formats.appendChild(meta_fmt);
764 }
765 return getMessage(lmf_doc, list_metadata_formats);
766
767
768 }
769
770 if (params.getLength() > 1) {
771 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
772 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
773 }
774
775 // This is a request for the metadata of a particular item with an identifier
776 /**the request xml is in the form: <request>
777 * <param name=.../>
778 * </request>
779 *And there is a param element and one element only. (No paramList element in between).
780 */
781 param = (Element)params.item(0);
782 String param_name = param.getAttribute(GSXML.NAME_ATT);
783 String identifier = "";
784 if (!param_name.equals(OAIXML.IDENTIFIER)) {
785 //Bad argument
786 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
787 }
788
789 identifier = param.getAttribute(GSXML.VALUE_ATT);
790 // the identifier is in the form: <coll_name>:<OID>
791 // so it must contain at least two ':' characters
792 String[] strs = identifier.split(":");
793 if(strs == null || strs.length < 2) {
794 // the OID may also contain ':'
795 logger.error("identifier is not in the form coll:id" + identifier);
796 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
797 }
798
799 // send request to message router
800 // get the names
801 strs = splitNames(identifier);
802 if(strs == null || strs.length < 2) {
803 logger.error("identifier is not in the form coll:id" + identifier);
804 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
805 }
806 //String name_of_site = strs[0];
807 String coll_name = strs[0];
808 String oid = strs[1];
809
810 //re-organize the request element
811 // reset the 'to' attribute
812 String verb = req.getAttribute(GSXML.TO_ATT);
813 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
814 // reset the identifier element
815 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
816 param.setAttribute(GSXML.VALUE_ATT, oid);
817
818 // TODO is this the best way to do this???? should we create a new request???
819 Element message = req.getOwnerDocument().createElement(GSXML.MESSAGE_ELEM);
820 message.appendChild(req);
821 //Now send the request to the message router to process
822 Node result_node = mr.process(message);
823 return converter.nodeToElement(result_node);
824 }
825
826
827
828
829 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
830 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
831 if(original_element != null) {
832 copyNode(to_elem, original_element);
833 }
834 }
835
836 private void copyNode(Element to_elem, Node original_element) {
837 to_elem.appendChild(to_elem.getOwnerDocument().importNode(original_element, true));
838
839 }
840
841 private Element doIdentify() {
842 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
843 logger.info("");
844 if (this.identify_response != null) {
845 // we have already created it
846 return this.identify_response;
847 }
848 Document doc = this.converter.newDOM();
849 Element identify = doc.createElement(OAIXML.IDENTIFY);
850 //do the repository name
851 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
852 //do the baseurl
853 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
854 //do the protocol version
855 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
856
857 //There can be more than one admin email according to the OAI specification
858 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
859 int num_admin = 0;
860 Element from_admin_email = null;
861 if (admin_emails != null) {
862 num_admin = admin_emails.getLength();
863 }
864 for (int i=0; i<num_admin; i++) {
865 copyNode(identify, admin_emails.item(i));
866 }
867
868 //do the earliestDatestamp
869 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
870 //ask the message router for a list of oai collections
871 //NodeList oai_coll = getOAICollectionList();
872 long earliestDatestamp = getEarliestDateStamp(collection_list);
873 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
874 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
875 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
876 identify.appendChild(earliestDatestamp_elem);
877
878 //do the deletedRecord
879 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
880 //do the granularity
881 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
882
883 // output the oai identifier
884 Element description = doc.createElement(OAIXML.DESCRIPTION);
885 identify.appendChild(description);
886 // TODO, make this a valid id
887 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
888 description.appendChild(oaiIdentifier);
889
890 // if there are any oaiInfo metadata, add them in too.
891 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
892 if (info != null) {
893 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
894 if (meta != null && meta.getLength() > 0) {
895 Element gsdl = OAIXML.createGSDLElement(doc);
896 description.appendChild(gsdl);
897 for (int m = 0; m<meta.getLength(); m++) {
898 copyNode(gsdl, meta.item(m));
899 }
900
901 }
902 }
903 this.identify_response = identify;
904 return getMessage(doc, identify);
905 }
906 //split setSpec (site_name:coll_name) into an array of strings
907 //It has already been checked that the set_spec contains at least one ':'
908 private String[] splitSetSpec(String set_spec) {
909 logger.info(set_spec);
910 String[] strs = new String[2];
911 int colon_index = set_spec.indexOf(":");
912 strs[0] = set_spec.substring(0, colon_index);
913 strs[1] = set_spec.substring(colon_index + 1);
914 return strs;
915 }
916 /** split the identifier into <collection + OID> as an array
917 It has already been checked that the 'identifier' contains at least one ':'
918 */
919 private String[] splitNames(String identifier) {
920 logger.info(identifier);
921 String [] strs = new String[2];
922 int first_colon = identifier.indexOf(":");
923 if(first_colon == -1) {
924 return null;
925 }
926 strs[0] = identifier.substring(0, first_colon);
927 strs[1] = identifier.substring(first_colon + 1);
928 return strs;
929 }
930 /** validate if the specified metadata prefix value is supported by the repository
931 * by checking it in the OAIConfig.xml
932 */
933 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
934 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
935
936 for(int i=0; i<prefix_list.getLength(); i++) {
937 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
938 return true;
939 }
940 }
941 return false;
942 }
943 private Element doGetRecord(Element req){
944 logger.info("");
945 /** arguments:
946 identifier: required
947 metadataPrefix: required
948 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
949 */
950 Document doc = this.converter.newDOM();
951 Element get_record = doc.createElement(OAIXML.GET_RECORD);
952
953 HashSet<String> valid_strs = new HashSet<String>();
954 valid_strs.add(OAIXML.IDENTIFIER);
955 valid_strs.add(OAIXML.METADATA_PREFIX);
956
957 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
958 HashMap<String, String> param_map = GSXML.getParamMap(params);
959
960 if(!areAllParamsValid(param_map, valid_strs) ||
961 params.getLength() == 0 ||
962 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
963 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
964 logger.error("must have the metadataPrefix/identifier parameter.");
965 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
966 }
967
968 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
969 String identifier = param_map.get(OAIXML.IDENTIFIER);
970
971 // verify the metadata prefix
972 if (repositorySupportsMetadataPrefix(prefix) == false) {
973 logger.error("requested prefix is not found in OAIConfig.xml");
974 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
975 }
976
977 // get the names
978 String[] strs = splitNames(identifier);
979 if(strs == null || strs.length < 2) {
980 logger.error("identifier is not in the form coll:id" + identifier);
981 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
982 }
983 //String name_of_site = strs[0];
984 String coll_name = strs[0];
985 String oid = strs[1];
986
987 //re-organize the request element
988 // reset the 'to' attribute
989 String verb = req.getAttribute(GSXML.TO_ATT);
990 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
991 // reset the identifier element
992 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
993 if (param != null) {
994 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
995 param.setAttribute(GSXML.VALUE_ATT, oid);
996 }
997
998 //Now send the request to the message router to process
999 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1000 msg.appendChild(doc.importNode(req, true));
1001 Node result_node = mr.process(msg);
1002 return converter.nodeToElement(result_node);
1003 }
1004
1005 // See OAIConfig.xml
1006 // dynamically works out what the earliestDateStamp is, since it varies by collection
1007 // returns this time in *milliseconds*.
1008 protected long getEarliestDateStamp(Element oai_coll_list) {
1009 // config earliest datstamp
1010 long config_datestamp = 0;
1011 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1012 if (config_datestamp_elem != null) {
1013 String datest = GSXML.getNodeText(config_datestamp_elem);
1014 config_datestamp = OAIXML.getTime(datest);
1015 if (config_datestamp == -1) {
1016 config_datestamp = 0;
1017 }
1018 }
1019 //do the earliestDatestamp
1020 long current_time = System.currentTimeMillis();
1021 long earliestDatestamp = current_time;
1022 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1023 int oai_coll_size = oai_coll.getLength();
1024 if (oai_coll_size == 0) {
1025 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1026 return config_datestamp;
1027 }
1028 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1029 // we get the earliestDatestamp among the collections
1030 for(int i=0; i<oai_coll_size; i++) {
1031 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1032 if (coll_earliestDatestamp == 0) {
1033 // try last modified
1034 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1035 }
1036 if (coll_earliestDatestamp > 0) {
1037 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1038 }
1039 }
1040 if (earliestDatestamp == current_time) {
1041 logger.info("no collection had a real datestamp, using value from OAIConfig");
1042 return config_datestamp;
1043 }
1044 return earliestDatestamp;
1045 }
1046
1047 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1048
1049 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1050 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1051
1052 Node child = this.collection_list.getFirstChild();
1053 while (child != null) {
1054 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1055 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1056 if (set_coll_list.contains(coll_id)) {
1057 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1058 if (initial_time < last_modified) {
1059 return true;
1060 }
1061 }
1062 }
1063 child = child.getNextSibling();
1064 }
1065 return false;
1066
1067 }
1068
1069}
1070
1071
Note: See TracBrowser for help on using the repository browser.