source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28882

Last change on this file since 28882 was 28882, checked in by kjdon, 10 years ago

removed a comment

File size: 42.1 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** a converter class to parse XML and create Docs */
51 protected XMLConverter converter=null;
52
53 /** the configure file of this receptionist passed from the oai servlet. */
54 protected Element oai_config = null;
55
56 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
57 protected int resume_after = -1 ;
58
59 /** the message router that the Receptionist and Actions will talk to */
60 protected ModuleInterface mr = null;
61
62 // Some of the data/responses will not change while the servlet is running, so
63 // we can cache them
64
65 /** A list of all the collections available to this OAI server */
66 protected Element collection_list = null;
67 /** a vector of the names, for convenience */
68 protected Vector<String> collection_name_list = null;
69 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
70 protected boolean noRecordsMatch = false;
71
72 /** A set of all known 'sets' */
73 protected HashSet<String> set_set = null;
74
75 protected boolean has_super_colls = false;
76 /** a hash of super set-> collection list */
77 protected HashMap<String, Vector<String>> super_coll_map = null;
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86 this.converter = new XMLConverter();
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 if (!configureSetInfo()) {
122 // there are no sets
123 logger.error("No sets (collections) available for OAI");
124 return false;
125 }
126
127 //clear out expired resumption tokens stored in OAIResumptionToken.xml
128 OAIResumptionToken.init();
129 OAIResumptionToken.clearExpiredTokens();
130
131 return true;
132 }
133
134 // assuming that sets are static. If collections change then the servlet
135 // should be restarted.
136 private boolean configureSetInfo() {
137 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
138 // Will be like
139 // <oaiSuperSet>
140 // <SetSpec>xxx</SetSpec>
141 // <setName>xxx</SetName>
142 // <SetDescription>xxx</setDescription>
143 // </oaiSuperSet>
144 // The super set is listed in OAIConfig, and collections themselves state
145 // whether they are part of the super set or not.
146 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
147 HashMap<String, Element> super_coll_data = new HashMap<String, Element>();
148 if (super_coll_list.getLength() > 0) {
149 this.has_super_colls = true;
150 for (int i=0; i<super_coll_list.getLength(); i++) {
151 Element super_coll = (Element)super_coll_list.item(i);
152 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
153 if (set_spec != null) {
154 String name = GSXML.getNodeText(set_spec);
155 if (!name.equals("")) {
156 super_coll_data.put(name, super_coll);
157 logger.error("adding in super coll "+name);
158 }
159 }
160 }
161
162 if (super_coll_data.size()==0) {
163 this.has_super_colls = false;
164 }
165 }
166 if (this.has_super_colls == true) {
167 this.super_coll_map = new HashMap<String, Vector<String>>();
168 }
169 this.set_set = new HashSet<String>();
170
171 // next, we get a list of all the OAI enabled collections
172 // We get this by sending a listSets request to the MR
173 Document doc = this.converter.newDOM();
174 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
175
176 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
177 message.appendChild(request);
178 Node msg_node = mr.process(message);
179
180 if (msg_node == null) {
181 logger.error("returned msg_node from mr is null");
182 return false;
183 }
184 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
185 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
186 if (coll_list == null) {
187 logger.error("coll_list is null");
188 return false;
189 }
190
191 this.collection_list = (Element)doc.importNode(coll_list, true);
192
193 // go through and store a list of collection names for convenience
194 // also create a 'to' attribute
195 Node child = this.collection_list.getFirstChild();
196 if (child == null) {
197 logger.error("collection list has no children");
198 noRecordsMatch = true;
199 return false;
200 }
201
202 this.collection_name_list = new Vector<String>();
203 StringBuffer to = new StringBuffer();
204 boolean first = true;
205 while (child != null) {
206 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
207 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
208 this.collection_name_list.add(coll_id);
209 if (!first) {
210 to.append(',');
211 }
212 first = false;
213 to.append(coll_id+"/"+OAIXML.LIST_SETS);
214 }
215 child = child.getNextSibling();
216 }
217 if (first) {
218 // we haven't found any collections
219 logger.error("found no collection elements in collectionList");
220 noRecordsMatch = true;
221 return false;
222 }
223 Document listsets_doc = this.converter.newDOM();
224 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
225 this.listsets_response = getMessage(listsets_doc, listsets_element);
226
227 // Now, for each collection, get a list of all its sets
228 // might include subsets (classifiers) or super colls
229 // We'll reuse the first message, changing its type and to atts
230 request.setAttribute(GSXML.TYPE_ATT, "");
231 request.setAttribute(GSXML.TO_ATT, to.toString());
232 // send to MR
233 msg_node = mr.process(message);
234 logger.error(this.converter.getPrettyString(msg_node));
235 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
236 for (int c=0; c<response_list.getLength(); c++) {
237 // for each collection's response
238 Element response = (Element)response_list.item(c);
239 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
240 logger.error("coll from response "+coll_name);
241 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
242 for (int j=0; j<set_list.getLength(); j++) {
243 // now check if it a super collection
244 Element set = (Element)set_list.item(j);
245 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
246 logger.error("set spec = "+set_spec);
247 // this may change if we add site name back in
248 // setSpecs will be collname or collname:subset or supercollname
249 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
250 // it must be a super coll spec
251 logger.error("found super coll, "+set_spec);
252 // check that it is a valid one from config
253 if (this.has_super_colls == true && super_coll_data.containsKey(set_spec)) {
254 Vector <String> subcolls = this.super_coll_map.get(set_spec);
255 if (subcolls == null) {
256 logger.error("its new!!");
257 // not in there yet
258 subcolls = new Vector<String>();
259 this.set_set.add(set_spec);
260 this.super_coll_map.put(set_spec, subcolls);
261 // the first time a supercoll is mentioned, add into the set list
262 logger.error("finding the set info "+this.converter.getPrettyString(super_coll_data.get(set_spec)));
263 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, super_coll_data.get(set_spec), OAIXML.SET, true));
264 }
265 // add this collection to the list for the super coll
266 subcolls.add(coll_name);
267 }
268 } else { // its either the coll itself or a subcoll
269 // add in the set
270 listsets_element.appendChild(listsets_doc.importNode(set, true));
271 this.set_set.add(set_spec);
272 }
273 } // for each set in the collection
274 } // for each OAI enabled collection
275 return true;
276 }
277
278 /** process using strings - just calls process using Elements */
279 public String process(String xml_in) {
280
281 Node message_node = this.converter.getDOM(xml_in);
282 Node page = process(message_node);
283 return this.converter.getString(page);
284 }
285
286 //Compose a message/response element used to send back to the OAIServer servlet.
287 //This method is only used within OAIReceptionist
288 private Element getMessage(Document doc, Element e) {
289 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
290 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
291 msg.appendChild(response);
292 response.appendChild(e);
293 return msg;
294 }
295
296 /** process - produce xml data in response to a request
297 * if something goes wrong, it returns null -
298 */
299 public Node process(Node message_node) {
300 logger.error("OAIReceptionist received request");
301
302 Element message = this.converter.nodeToElement(message_node);
303 logger.error(this.converter.getString(message));
304
305 // check that its a correct message tag
306 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
307 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
308 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
309 }
310
311 // get the request out of the message - assume that there is only one
312 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
313 if (request == null) {
314 logger.error(" message had no request!");
315 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
316 }
317 //At this stage, the value of 'to' attribute of the request must be the 'verb'
318 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
319 String verb = request.getAttribute(GSXML.TO_ATT);
320 if (verb.equals(OAIXML.IDENTIFY)) {
321 return doIdentify();
322 }
323 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
324 return doListMetadataFormats(request);
325 }
326 if (verb.equals(OAIXML.LIST_SETS)) {
327 // we have composed the list sets response on init
328 // Note this means that list sets never uses resumption tokens
329 return this.listsets_response;
330 }
331 if (verb.equals(OAIXML.GET_RECORD)) {
332 return doGetRecord(request);
333 }
334 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
335 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
336 }
337 if (verb.equals(OAIXML.LIST_RECORDS)) {
338 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
339 }
340 // should never get here as verbs were checked in OAIServer
341 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
342
343 }
344
345
346 private int getResumeAfter() {
347 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
348 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
349 return -1;
350 }
351 private String getRepositoryIdentifier() {
352 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
353 if (ri != null) {
354 return GSXML.getNodeText(ri);
355 }
356 return "";
357 }
358
359
360 /** if the param_map contains strings other than those in valid_strs, return false;
361 * otherwise true.
362 */
363 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
364 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
365 for(int i=0; i<param_list.size(); i++) {
366 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
367 if (valid_strs.contains(param_list.get(i)) == false) {
368 return false;
369 }
370 }
371 return true;
372 }
373
374 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
375 // options: from, until, set, metadataPrefix, resumptionToken
376 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
377 HashSet<String> valid_strs = new HashSet<String>();
378 valid_strs.add(OAIXML.FROM);
379 valid_strs.add(OAIXML.UNTIL);
380 valid_strs.add(OAIXML.SET);
381 valid_strs.add(OAIXML.METADATA_PREFIX);
382 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
383
384 Document result_doc = this.converter.newDOM();
385 Element result_element = result_doc.createElement(verb);
386 boolean result_token_needed = false; // does this result need to include a
387 // resumption token
388
389 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
390
391 HashMap<String, String> param_map = GSXML.getParamMap(params);
392
393 // are all the params valid?
394 if (!areAllParamsValid(param_map, valid_strs)) {
395 logger.error("One of the params is invalid");
396 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
397 // TODO, need to tell the user which one was invalid ??
398 }
399
400 // Do we have a resumption token??
401 String token = null;
402 String from = null;
403 String until = null;
404 boolean set_requested = false;
405 String set_spec_str = null;
406 String prefix_value = null;
407 int cursor = 0;
408 int current_cursor = 0;
409 String current_set = null;
410 long initial_time = 0;
411
412 int total_size = -1; // we are only going to set this in resumption
413 // token if it is easy to work out, i.e. not sending extra requests to
414 // MR just to calculate total size
415
416 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
417 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
418 // validate resumptionToken
419 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
420 logger.info("has resumptionToken " + token);
421 if(OAIResumptionToken.isValidToken(token) == false) {
422 logger.error("token is not valid");
423 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
424 }
425 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
426 // initialise the request params from the stored token data
427 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
428 from = token_data.get(OAIXML.FROM);
429 until = token_data.get(OAIXML.UNTIL);
430 set_spec_str = token_data.get(OAIXML.SET);
431 if (set_spec_str != null) {
432 set_requested = true;
433 }
434 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
435 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
436 try {
437 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
438 cursor = cursor + resume_after; // increment cursor
439 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
440 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
441 } catch (NumberFormatException e) {
442 logger.error("tried to parse int from cursor data and failed");
443 }
444
445 // check that the collections/sets haven't changed since the token was issued
446 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
447 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
448 OAIResumptionToken.expireToken(token);
449 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
450 }
451 }
452 else {
453 // no resumption token, lets check the other params
454 // there must be a metadataPrefix
455 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
456 logger.error("metadataPrefix param required");
457 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
458 }
459
460 //if there are any date params, check they're of the right format
461 from = param_map.get(OAIXML.FROM);
462 if(from != null) {
463 Date from_date = OAIXML.getDate(from);
464 if(from_date == null) {
465 logger.error("invalid date: " + from);
466 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
467 }
468 }
469 until = param_map.get(OAIXML.UNTIL);
470 if(until != null) {
471 Date until_date = OAIXML.getDate(until);
472 if(until_date == null) {
473 logger.error("invalid date: " + until);
474 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
475 }
476 }
477 if(from != null && until != null) { // check they are of the same date-time format (granularity)
478 if(from.length() != until.length()) {
479 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
480 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
481 }
482 }
483
484 // check the set arg is a set we know about
485 set_requested = param_map.containsKey(OAIXML.SET);
486 set_spec_str = null;
487 if(set_requested == true) {
488 set_spec_str = param_map.get(OAIXML.SET);
489 if (!this.set_set.contains(set_spec_str)) {
490 // the set is not one we know about
491 logger.error("requested set is not found in this repository");
492 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
493
494 }
495 }
496 // Is the metadataPrefix arg one this repository supports?
497 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
498 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
499 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
500 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
501 }
502
503 } // else no resumption token, check other params
504
505 // Whew. Now we have validated the params, we can work on doing the actual
506 // request
507
508
509 Document doc = this.converter.newDOM();
510 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
511 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
512 // TODO does this need a type???
513 mr_msg.appendChild(mr_req);
514
515 // copy in the from/until params if there
516 if (from != null) {
517 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
518 }
519 if (until != null) {
520 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
521 }
522 // add metadataPrefix
523 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
524
525 // do we have a set???
526 // if no set, we send to all collections in the collection list
527 // if super set, we send to all collections in super set list
528 // if a single collection, send to it
529 // if a subset, send to the collection
530 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
531 boolean single_collection = false;
532 if (current_coll_list.size() == 1) {
533 single_collection = true;
534 }
535 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
536 // we have a subset - add the set param back in
537 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
538 }
539
540 int num_collected_records = 0;
541 int start_point = current_cursor; // may not be 0 if we are using a resumption token
542 String resumption_collection = "";
543 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
544
545 // iterate through the list of collections and send the request to each
546
547 int start_coll=0;
548 if (current_set != null) {
549 // we are resuming a previous request, need to locate the first collection
550 for (int i=0; i<current_coll_list.size(); i++) {
551 if (current_set.equals(current_coll_list.get(i))) {
552 start_coll = i;
553 break;
554 }
555 }
556 }
557
558 for (int i=start_coll; i<current_coll_list.size(); i++) {
559 String current_coll = current_coll_list.get(i);
560 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
561
562 Element result = (Element)mr.process(mr_msg);
563 logger.error(verb+ " result for coll "+current_coll);
564 logger.error(this.converter.getPrettyString(result));
565 if (result == null) {
566 logger.info("message router returns null");
567 // do what??? carry on? fail??
568 return OAIXML.createErrorMessage("Internal service returns null", "");
569 }
570 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
571 if(res == null) {
572 logger.info("response element in xml_result is null");
573 return OAIXML.createErrorMessage("Internal service returns null", "");
574 }
575 NodeList record_list = res.getElementsByTagName(record_type);
576 int num_records = record_list.getLength();
577 if(num_records == 0) {
578 logger.info("message router returns 0 records for coll "+current_coll);
579 continue; // try the next collection
580 }
581 if (single_collection) {
582 total_size = num_records;
583 }
584 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
585 if (records_to_add > (num_records-start_point)) {
586 records_to_add = num_records-start_point;
587 }
588 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
589 num_collected_records += records_to_add;
590
591 // do we need to stop here, and do we need to issue a resumption token?
592 if (resume_after > 0 && num_collected_records == resume_after) {
593 // we have finished collecting records at the moment.
594 // but are we conincidentally at the end? or are there more to go?
595 if (records_to_add < (num_records - start_point)) {
596 // we have added less than this collection had
597 start_point += records_to_add;
598 resumption_collection = current_coll;
599 result_token_needed = true;
600 }
601 else {
602 // we added all this collection had to offer
603 // is there another collection in the list??
604 if (i<current_coll_list.size()-1) {
605 result_token_needed = true;
606 start_point = 0;
607 resumption_collection = current_coll_list.get(i+1);
608 }
609 else {
610 // we have finished one collection and there are no more collection
611 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
612 logger.error("at end of list, need empty result token");
613 empty_result_token = true;
614 }
615 }
616 break;
617 }
618 start_point = 0; // only the first one will have start non-zero, if we
619 // have a resumption token
620
621 } // for each collection
622
623 if (num_collected_records ==0) {
624 // there were no matching results
625 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
626 }
627
628 if (num_collected_records < resume_after) {
629 // we have been through all collections, and there are no more
630 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
631 if (result_token_needed) {
632 empty_result_token = true;
633 }
634 }
635
636 if (result_token_needed) {
637 // we need a resumption token
638 if (empty_result_token) {
639 logger.error("have empty result token");
640 token = "";
641 } else {
642 if (token != null) {
643 // we had a token for this request, we can just update it
644 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
645 } else {
646 // we are generating a new one
647 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
648 }
649 }
650
651 // result token XML
652 long expiration_date = -1;
653 if (empty_result_token) {
654 // we know how many records in total as we have sent them all
655 total_size = cursor+num_collected_records;
656 } else {
657 // non-empty token, set the expiration date
658 expiration_date = OAIResumptionToken.getExpirationDate(token);
659 }
660 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
661 // OAIXML.addToken(token_elem); // store it
662 result_element.appendChild(token_elem); // add to the result
663 }
664
665
666 return getMessage(result_doc, result_element);
667 }
668
669 private Vector<String> getCollectionListForSet(String set) {
670 if (set == null) {
671 // no set requested, need the complete collection list
672 return this.collection_name_list;
673 }
674 if (has_super_colls && super_coll_map.containsKey(set)) {
675 return super_coll_map.get(set);
676 }
677
678 Vector<String> coll_list = new Vector<String>();
679 if (set.indexOf(":") != -1) {
680 String col_name = set.substring(0, set.indexOf(":"));
681 coll_list.add(col_name);
682 }
683 else {
684 coll_list.add(set);
685 }
686 return coll_list;
687 }
688 private void addRecordsToList(Document doc, Element result_element, NodeList
689 record_list, int start_point, int num_records) {
690 int end_point = start_point + num_records;
691 for (int i=start_point; i<end_point; i++) {
692 result_element.appendChild(doc.importNode(record_list.item(i), true));
693 }
694 }
695
696
697 // method exclusively used by doListRecords/doListIdentifiers
698 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
699 for (int i=start_point; i<end_point; i++) {
700 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
701 }
702 }
703 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
704 if(result == null) {
705 //in the first round, result is null
706 return msg;
707 }
708 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
709 if(res_in_result == null) { // return the results of all other collections accumulated so far
710 return msg;
711 }
712 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
713 if(msg == null) {
714 return result;
715 }
716
717 //e.g., get all <record> elements from the returned message. There may be none of
718 //such element, for example, the collection service returned an error message
719 NodeList elem_list = msg.getElementsByTagName(elem_name);
720
721 for (int i=0; i<elem_list.getLength(); i++) {
722 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
723 }
724 return result;
725 }
726
727
728 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
729 * The first one is handled here, and the last two are processed by OAIPMH.
730 */
731 private Element doListMetadataFormats(Element req) {
732 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
733 //, or there is no parameter; otherwise it is an error
734 //logger.info("" + this.converter.getString(msg));
735
736 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
737 Element param = null;
738 Document lmf_doc = this.converter.newDOM();
739 if(params.getLength() == 0) {
740 //this is requesting metadata formats for the whole repository
741 //read the oaiConfig.xml file, return the metadata formats specified there.
742 if (this.listmetadataformats_response != null) {
743 // we have already created it
744 return this.listmetadataformats_response;
745 }
746
747 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
748
749 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
750 if(format_list == null) {
751 logger.error("OAIConfig.xml must contain the supported metadata formats");
752 // TODO this is internal error, what to do???
753 return getMessage(lmf_doc, list_metadata_formats);
754 }
755 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
756 for(int i=0; i<formats.getLength(); i++) {
757 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
758 Element first_meta_format = (Element)formats.item(i);
759 //the element also contains mappings, but we don't want them
760 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
761 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
762 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
763 list_metadata_formats.appendChild(meta_fmt);
764 }
765 return getMessage(lmf_doc, list_metadata_formats);
766
767
768 }
769
770 if (params.getLength() > 1) {
771 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
772 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
773 }
774
775 // This is a request for the metadata of a particular item with an identifier
776 /**the request xml is in the form: <request>
777 * <param name=.../>
778 * </request>
779 *And there is a param element and one element only. (No paramList element in between).
780 */
781 param = (Element)params.item(0);
782 String param_name = param.getAttribute(GSXML.NAME_ATT);
783 String identifier = "";
784 if (!param_name.equals(OAIXML.IDENTIFIER)) {
785 //Bad argument
786 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
787 }
788
789 identifier = param.getAttribute(GSXML.VALUE_ATT);
790 // the identifier is in the form: <coll_name>:<OID>
791 // so it must contain at least two ':' characters
792 String[] strs = identifier.split(":");
793 if(strs == null || strs.length < 2) {
794 // the OID may also contain ':'
795 logger.error("identifier is not in the form coll:id" + identifier);
796 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
797 }
798
799 // send request to message router
800 // get the names
801 strs = splitNames(identifier);
802 if(strs == null || strs.length < 2) {
803 logger.error("identifier is not in the form coll:id" + identifier);
804 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
805 }
806 //String name_of_site = strs[0];
807 String coll_name = strs[0];
808 String oid = strs[1];
809
810 //re-organize the request element
811 // reset the 'to' attribute
812 String verb = req.getAttribute(GSXML.TO_ATT);
813 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
814 // reset the identifier element
815 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
816 param.setAttribute(GSXML.VALUE_ATT, oid);
817
818 // TODO is this the best way to do this???? should we create a new request???
819 Element message = req.getOwnerDocument().createElement(GSXML.MESSAGE_ELEM);
820 message.appendChild(req);
821 //Now send the request to the message router to process
822 Node result_node = mr.process(message);
823 return converter.nodeToElement(result_node);
824 }
825
826
827
828
829 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
830 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
831 if(original_element != null) {
832 copyNode(to_elem, original_element);
833 }
834 }
835
836 private void copyNode(Element to_elem, Node original_element) {
837 to_elem.appendChild(to_elem.getOwnerDocument().importNode(original_element, true));
838
839 }
840
841 private Element doIdentify() {
842 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
843 logger.info("");
844 if (this.identify_response != null) {
845 // we have already created it
846 return this.identify_response;
847 }
848 Document doc = this.converter.newDOM();
849 Element identify = doc.createElement(OAIXML.IDENTIFY);
850 //do the repository name
851 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
852 //do the baseurl
853 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
854 //do the protocol version
855 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
856
857 //There can be more than one admin email according to the OAI specification
858 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
859 int num_admin = 0;
860 Element from_admin_email = null;
861 if (admin_emails != null) {
862 num_admin = admin_emails.getLength();
863 }
864 for (int i=0; i<num_admin; i++) {
865 copyNode(identify, admin_emails.item(i));
866 }
867
868 //do the earliestDatestamp
869 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
870 //ask the message router for a list of oai collections
871 //NodeList oai_coll = getOAICollectionList();
872 long earliestDatestamp = getEarliestDateStamp(collection_list);
873 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
874 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
875 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
876 identify.appendChild(earliestDatestamp_elem);
877
878 //do the deletedRecord
879 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
880 //do the granularity
881 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
882
883 // output the oai identifier
884 Element description = doc.createElement(OAIXML.DESCRIPTION);
885 identify.appendChild(description);
886 // TODO, make this a valid id
887 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
888 description.appendChild(oaiIdentifier);
889
890 // if there are any oaiInfo metadata, add them in too.
891 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
892 if (info != null) {
893 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
894 if (meta != null && meta.getLength() > 0) {
895 Element gsdl = OAIXML.createGSDLElement(doc);
896 description.appendChild(gsdl);
897 for (int m = 0; m<meta.getLength(); m++) {
898 copyNode(gsdl, meta.item(m));
899 }
900
901 }
902 }
903 this.identify_response = identify;
904 return getMessage(doc, identify);
905 }
906 //split setSpec (site_name:coll_name) into an array of strings
907 //It has already been checked that the set_spec contains at least one ':'
908 private String[] splitSetSpec(String set_spec) {
909 logger.info(set_spec);
910 String[] strs = new String[2];
911 int colon_index = set_spec.indexOf(":");
912 strs[0] = set_spec.substring(0, colon_index);
913 strs[1] = set_spec.substring(colon_index + 1);
914 return strs;
915 }
916 /** split the identifier into <collection + OID> as an array
917 It has already been checked that the 'identifier' contains at least one ':'
918 */
919 private String[] splitNames(String identifier) {
920 logger.info(identifier);
921 String [] strs = new String[2];
922 int first_colon = identifier.indexOf(":");
923 if(first_colon == -1) {
924 return null;
925 }
926 strs[0] = identifier.substring(0, first_colon);
927 strs[1] = identifier.substring(first_colon + 1);
928 return strs;
929 }
930 /** validate if the specified metadata prefix value is supported by the repository
931 * by checking it in the OAIConfig.xml
932 */
933 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
934 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
935
936 for(int i=0; i<prefix_list.getLength(); i++) {
937 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
938 return true;
939 }
940 }
941 return false;
942 }
943 private Element doGetRecord(Element req){
944 logger.info("");
945 /** arguments:
946 identifier: required
947 metadataPrefix: required
948 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
949 */
950 Document doc = this.converter.newDOM();
951 Element get_record = doc.createElement(OAIXML.GET_RECORD);
952
953 HashSet<String> valid_strs = new HashSet<String>();
954 valid_strs.add(OAIXML.IDENTIFIER);
955 valid_strs.add(OAIXML.METADATA_PREFIX);
956
957 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
958 HashMap<String, String> param_map = GSXML.getParamMap(params);
959
960 if(!areAllParamsValid(param_map, valid_strs) ||
961 params.getLength() == 0 ||
962 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
963 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
964 logger.error("must have the metadataPrefix/identifier parameter.");
965 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
966 }
967
968 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
969 String identifier = param_map.get(OAIXML.IDENTIFIER);
970
971 // verify the metadata prefix
972 if (repositorySupportsMetadataPrefix(prefix) == false) {
973 logger.error("requested prefix is not found in OAIConfig.xml");
974 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
975 }
976
977 // get the names
978 String[] strs = splitNames(identifier);
979 if(strs == null || strs.length < 2) {
980 logger.error("identifier is not in the form coll:id" + identifier);
981 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
982 }
983 //String name_of_site = strs[0];
984 String coll_name = strs[0];
985 String oid = strs[1];
986
987 //re-organize the request element
988 // reset the 'to' attribute
989 String verb = req.getAttribute(GSXML.TO_ATT);
990 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
991 // reset the identifier element
992 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
993 if (param != null) {
994 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
995 param.setAttribute(GSXML.VALUE_ATT, oid);
996 }
997
998 //Now send the request to the message router to process
999 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1000 msg.appendChild(doc.importNode(req, true));
1001 Node result_node = mr.process(msg);
1002 return converter.nodeToElement(result_node);
1003 }
1004
1005 // See OAIConfig.xml
1006 // dynamically works out what the earliestDateStamp is, since it varies by collection
1007 // returns this time in *milliseconds*.
1008 protected long getEarliestDateStamp(Element oai_coll_list) {
1009 // config earliest datstamp
1010 long config_datestamp = 0;
1011 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1012 if (config_datestamp_elem != null) {
1013 String datest = GSXML.getNodeText(config_datestamp_elem);
1014 config_datestamp = OAIXML.getTime(datest);
1015 if (config_datestamp == -1) {
1016 config_datestamp = 0;
1017 }
1018 }
1019 //do the earliestDatestamp
1020 long current_time = System.currentTimeMillis();
1021 long earliestDatestamp = current_time;
1022 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1023 int oai_coll_size = oai_coll.getLength();
1024 if (oai_coll_size == 0) {
1025 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1026 return config_datestamp;
1027 }
1028 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1029 // we get the earliestDatestamp among the collections
1030 for(int i=0; i<oai_coll_size; i++) {
1031 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1032 if (coll_earliestDatestamp == 0) {
1033 // try last modified
1034 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1035 }
1036 if (coll_earliestDatestamp > 0) {
1037 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1038 }
1039 }
1040 if (earliestDatestamp == current_time) {
1041 logger.info("no collection had a real datestamp, using value from OAIConfig");
1042 return config_datestamp;
1043 }
1044 return earliestDatestamp;
1045 }
1046
1047 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1048
1049 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1050 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1051
1052 Node child = this.collection_list.getFirstChild();
1053 while (child != null) {
1054 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1055 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1056 if (set_coll_list.contains(coll_id)) {
1057 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1058 if (initial_time < last_modified) {
1059 return true;
1060 }
1061 }
1062 }
1063 child = child.getNextSibling();
1064 }
1065 return false;
1066
1067 }
1068
1069}
1070
1071
Note: See TracBrowser for help on using the repository browser.