source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

File size: 43.2 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** a converter class to parse XML and create Docs */
51 protected XMLConverter converter=null;
52
53 /** the configure file of this receptionist passed from the oai servlet. */
54 protected Element oai_config = null;
55
56 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
57 protected int resume_after = -1 ;
58
59 /** the message router that the Receptionist and Actions will talk to */
60 protected ModuleInterface mr = null;
61
62 // Some of the data/responses will not change while the servlet is running, so
63 // we can cache them
64
65 /** A list of all the collections available to this OAI server */
66 protected Element collection_list = null;
67 /** a vector of the names, for convenience */
68 protected Vector<String> collection_name_list = null;
69 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
70 protected boolean noRecordsMatch = false;
71
72 /** A set of all known 'sets' */
73 protected HashSet<String> set_set = null;
74
75 protected boolean has_super_colls = false;
76 /** a hash of super set-> collection list */
77 protected HashMap<String, Vector<String>> super_coll_map = null;
78 /** store the super coll elements for convenience */
79 HashMap<String, Element> super_coll_data = null;
80 /** The identify response */
81 protected Element identify_response = null;
82 /** The list set response */
83 protected Element listsets_response = null;
84 /** the list metadata formats response */
85 protected Element listmetadataformats_response = null;
86
87 public OAIReceptionist() {
88 this.converter = new XMLConverter();
89 }
90
91 public void cleanUp() {
92 if (this.mr != null) {
93
94 this.mr.cleanUp();
95 }
96 OAIResumptionToken.saveTokensToFile();
97 }
98
99 public void setSiteName(String site_name) {
100 this.site_name = site_name;
101 }
102 /** sets the message router - it should already be created and
103 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
104 public void setMessageRouter(ModuleInterface mr) {
105 this.mr = mr;
106 }
107
108 /** configures the receptionist */
109 public boolean configure(Element config) {
110
111 if (this.mr==null) {
112 logger.error(" message routers must be set before calling oai configure");
113 return false;
114 }
115 if (config == null) {
116 logger.error(" oai configure file is null");
117 return false;
118 }
119 oai_config = config;
120 resume_after = getResumeAfter();
121
122 repository_id = getRepositoryIdentifier();
123 configureSuperSetInfo();
124 if (!configureSetInfo()) {
125 // there are no sets
126 logger.error("No sets (collections) available for OAI");
127 return false;
128 }
129
130 //clear out expired resumption tokens stored in OAIResumptionToken.xml
131 OAIResumptionToken.init();
132 OAIResumptionToken.clearExpiredTokens();
133
134 return true;
135 }
136
137 // assuming that sets are static. If collections change then the servlet
138 // should be restarted.
139 private boolean configureSuperSetInfo() {
140 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
141 // Will be like
142 // <oaiSuperSet>
143 // <SetSpec>xxx</SetSpec>
144 // <setName>xxx</SetName>
145 // <SetDescription>xxx</setDescription>
146 // </oaiSuperSet>
147 // The super set is listed in OAIConfig, and collections themselves state
148 // whether they are part of the super set or not.
149 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
150 this.super_coll_data = new HashMap<String, Element>();
151 if (super_coll_list.getLength() > 0) {
152 this.has_super_colls = true;
153 for (int i=0; i<super_coll_list.getLength(); i++) {
154 Element super_coll = (Element)super_coll_list.item(i);
155 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
156 if (set_spec != null) {
157 String name = GSXML.getNodeText(set_spec);
158 if (!name.equals("")) {
159 this.super_coll_data.put(name, super_coll);
160 logger.error("adding in super coll "+name);
161 }
162 }
163 }
164
165 if (this.super_coll_data.size()==0) {
166 this.has_super_colls = false;
167 }
168 }
169 if (this.has_super_colls == true) {
170 this.super_coll_map = new HashMap<String, Vector<String>>();
171 }
172 return true;
173
174 }
175 private boolean configureSetInfo() {
176 this.set_set = new HashSet<String>();
177
178 // First, we get a list of all the OAI enabled collections
179 // We get this by sending a listSets request to the MR
180 Document doc = XMLConverter.newDOM();
181 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
182
183 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
184 message.appendChild(request);
185 Node msg_node = mr.process(message);
186
187 if (msg_node == null) {
188 logger.error("returned msg_node from mr is null");
189 return false;
190 }
191 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
192 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
193 if (coll_list == null) {
194 logger.error("coll_list is null");
195 return false;
196 }
197
198 this.collection_list = (Element)doc.importNode(coll_list, true);
199
200 // go through and store a list of collection names for convenience
201 // also create a 'to' attribute
202 Node child = this.collection_list.getFirstChild();
203 if (child == null) {
204 logger.error("collection list has no children");
205 noRecordsMatch = true;
206 return false;
207 }
208
209 this.collection_name_list = new Vector<String>();
210 StringBuffer to = new StringBuffer();
211 boolean first = true;
212 while (child != null) {
213 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
214 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
215 this.collection_name_list.add(coll_id);
216 if (!first) {
217 to.append(',');
218 }
219 first = false;
220 to.append(coll_id+"/"+OAIXML.LIST_SETS);
221 }
222 child = child.getNextSibling();
223 }
224 if (first) {
225 // we haven't found any collections
226 logger.error("found no collection elements in collectionList");
227 noRecordsMatch = true;
228 return false;
229 }
230 Document listsets_doc = XMLConverter.newDOM();
231 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
232 this.listsets_response = getMessage(listsets_doc, listsets_element);
233
234 // Now, for each collection, get a list of all its sets
235 // might include subsets (classifiers) or super colls
236 // We'll reuse the first message, changing its type and to atts
237 request.setAttribute(GSXML.TYPE_ATT, "");
238 request.setAttribute(GSXML.TO_ATT, to.toString());
239 // send to MR
240 msg_node = mr.process(message);
241 logger.error(this.converter.getPrettyString(msg_node));
242 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
243 for (int c=0; c<response_list.getLength(); c++) {
244 // for each collection's response
245 Element response = (Element)response_list.item(c);
246 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
247 logger.error("coll from response "+coll_name);
248 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
249 for (int j=0; j<set_list.getLength(); j++) {
250 // now check if it a super collection
251 Element set = (Element)set_list.item(j);
252 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
253 logger.error("set spec = "+set_spec);
254 // this may change if we add site name back in
255 // setSpecs will be collname or collname:subset or supercollname
256 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
257 // it must be a super coll spec
258 logger.error("found super coll, "+set_spec);
259 // check that it is a valid one from config
260 if (this.has_super_colls == true && this.super_coll_data.containsKey(set_spec)) {
261 Vector <String> subcolls = this.super_coll_map.get(set_spec);
262 if (subcolls == null) {
263 logger.error("its new!!");
264 // not in there yet
265 subcolls = new Vector<String>();
266 this.set_set.add(set_spec);
267 this.super_coll_map.put(set_spec, subcolls);
268 // the first time a supercoll is mentioned, add into the set list
269 logger.error("finding the set info "+this.converter.getPrettyString(this.super_coll_data.get(set_spec)));
270 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, this.super_coll_data.get(set_spec), OAIXML.SET, true));
271 }
272 // add this collection to the list for the super coll
273 subcolls.add(coll_name);
274 }
275 } else { // its either the coll itself or a subcoll
276 // add in the set
277 listsets_element.appendChild(listsets_doc.importNode(set, true));
278 this.set_set.add(set_spec);
279 }
280 } // for each set in the collection
281 } // for each OAI enabled collection
282 return true;
283 }
284
285 protected void resetMessageRouter() {
286 // we just need to send a configure request to MR
287 Document doc = XMLConverter.newDOM();
288 Element mr_request_message = doc.createElement(GSXML.MESSAGE_ELEM);
289 Element mr_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_SYSTEM, "", null);
290 mr_request_message.appendChild(mr_request);
291
292 Element system = doc.createElement(GSXML.SYSTEM_ELEM);
293 mr_request.appendChild(system);
294 system.setAttribute(GSXML.TYPE_ATT, GSXML.SYSTEM_TYPE_CONFIGURE);
295
296 Element response = (Element) this.mr.process(mr_request_message);
297 logger.error("configure response = "+this.converter.getPrettyString(response));
298 }
299 /** process using strings - just calls process using Elements */
300 public String process(String xml_in) {
301
302 Node message_node = this.converter.getDOM(xml_in);
303 Node page = process(message_node);
304 return this.converter.getString(page);
305 }
306
307 //Compose a message/response element used to send back to the OAIServer servlet.
308 //This method is only used within OAIReceptionist
309 private Element getMessage(Document doc, Element e) {
310 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
311 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
312 msg.appendChild(response);
313 response.appendChild(e);
314 return msg;
315 }
316
317 /** process - produce xml data in response to a request
318 * if something goes wrong, it returns null -
319 */
320 public Node process(Node message_node) {
321 logger.error("OAIReceptionist received request");
322
323 Element message = GSXML.nodeToElement(message_node);
324 logger.error(this.converter.getString(message));
325
326 // check that its a correct message tag
327 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
328 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
329 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
330 }
331
332 // get the request out of the message - assume that there is only one
333 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
334 if (request == null) {
335 logger.error(" message had no request!");
336 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
337 }
338
339 // special case, reset=true for reloading the MR and recept data
340 String reset = request.getAttribute("reset");
341 if (!reset.equals("")) {
342 resetMessageRouter();
343 configureSetInfo();
344 return OAIXML.createResetResponse(true);
345 }
346
347
348 //At this stage, the value of 'to' attribute of the request must be the 'verb'
349 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
350 String verb = request.getAttribute(GSXML.TO_ATT);
351 if (verb.equals(OAIXML.IDENTIFY)) {
352 return doIdentify();
353 }
354 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
355 return doListMetadataFormats(request);
356 }
357 if (verb.equals(OAIXML.LIST_SETS)) {
358 // we have composed the list sets response on init
359 // Note this means that list sets never uses resumption tokens
360 return this.listsets_response;
361 }
362 if (verb.equals(OAIXML.GET_RECORD)) {
363 return doGetRecord(request);
364 }
365 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
366 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
367 }
368 if (verb.equals(OAIXML.LIST_RECORDS)) {
369 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
370 }
371 // should never get here as verbs were checked in OAIServer
372 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
373
374 }
375
376
377 private int getResumeAfter() {
378 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
379 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
380 return -1;
381 }
382 private String getRepositoryIdentifier() {
383 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
384 if (ri != null) {
385 return GSXML.getNodeText(ri);
386 }
387 return "";
388 }
389
390
391 /** if the param_map contains strings other than those in valid_strs, return false;
392 * otherwise true.
393 */
394 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
395 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
396 for(int i=0; i<param_list.size(); i++) {
397 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
398 if (valid_strs.contains(param_list.get(i)) == false) {
399 return false;
400 }
401 }
402 return true;
403 }
404
405 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
406 // options: from, until, set, metadataPrefix, resumptionToken
407 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
408 HashSet<String> valid_strs = new HashSet<String>();
409 valid_strs.add(OAIXML.FROM);
410 valid_strs.add(OAIXML.UNTIL);
411 valid_strs.add(OAIXML.SET);
412 valid_strs.add(OAIXML.METADATA_PREFIX);
413 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
414
415 Document result_doc = XMLConverter.newDOM();
416 Element result_element = result_doc.createElement(verb);
417 boolean result_token_needed = false; // does this result need to include a
418 // resumption token
419
420 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
421
422 HashMap<String, String> param_map = GSXML.getParamMap(params);
423
424 // are all the params valid?
425 if (!areAllParamsValid(param_map, valid_strs)) {
426 logger.error("One of the params is invalid");
427 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
428 // TODO, need to tell the user which one was invalid ??
429 }
430
431 // Do we have a resumption token??
432 String token = null;
433 String from = null;
434 String until = null;
435 boolean set_requested = false;
436 String set_spec_str = null;
437 String prefix_value = null;
438 int cursor = 0;
439 int current_cursor = 0;
440 String current_set = null;
441 long initial_time = 0;
442
443 int total_size = -1; // we are only going to set this in resumption
444 // token if it is easy to work out, i.e. not sending extra requests to
445 // MR just to calculate total size
446
447 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
448 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
449 // validate resumptionToken
450 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
451 logger.info("has resumptionToken " + token);
452 if(OAIResumptionToken.isValidToken(token) == false) {
453 logger.error("token is not valid");
454 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
455 }
456 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
457 // initialise the request params from the stored token data
458 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
459 from = token_data.get(OAIXML.FROM);
460 until = token_data.get(OAIXML.UNTIL);
461 set_spec_str = token_data.get(OAIXML.SET);
462 if (set_spec_str != null) {
463 set_requested = true;
464 }
465 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
466 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
467 try {
468 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
469 cursor = cursor + resume_after; // increment cursor
470 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
471 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
472 } catch (NumberFormatException e) {
473 logger.error("tried to parse int from cursor data and failed");
474 }
475
476 // check that the collections/sets haven't changed since the token was issued
477 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
478 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
479 OAIResumptionToken.expireToken(token);
480 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
481 }
482 }
483 else {
484 // no resumption token, lets check the other params
485 // there must be a metadataPrefix
486 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
487 logger.error("metadataPrefix param required");
488 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
489 }
490
491 //if there are any date params, check they're of the right format
492 from = param_map.get(OAIXML.FROM);
493 if(from != null) {
494 Date from_date = OAIXML.getDate(from);
495 if(from_date == null) {
496 logger.error("invalid date: " + from);
497 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
498 }
499 }
500 until = param_map.get(OAIXML.UNTIL);
501 if(until != null) {
502 Date until_date = OAIXML.getDate(until);
503 if(until_date == null) {
504 logger.error("invalid date: " + until);
505 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
506 }
507 }
508 if(from != null && until != null) { // check they are of the same date-time format (granularity)
509 if(from.length() != until.length()) {
510 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
511 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
512 }
513 }
514
515 // check the set arg is a set we know about
516 set_requested = param_map.containsKey(OAIXML.SET);
517 set_spec_str = null;
518 if(set_requested == true) {
519 set_spec_str = param_map.get(OAIXML.SET);
520 if (!this.set_set.contains(set_spec_str)) {
521 // the set is not one we know about
522 logger.error("requested set is not found in this repository");
523 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
524
525 }
526 }
527 // Is the metadataPrefix arg one this repository supports?
528 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
529 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
530 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
531 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
532 }
533
534 } // else no resumption token, check other params
535
536 // Whew. Now we have validated the params, we can work on doing the actual
537 // request
538
539
540 Document doc = XMLConverter.newDOM();
541 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
542 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
543 // TODO does this need a type???
544 mr_msg.appendChild(mr_req);
545
546 // copy in the from/until params if there
547 if (from != null) {
548 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
549 }
550 if (until != null) {
551 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
552 }
553 // add metadataPrefix
554 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
555
556 // do we have a set???
557 // if no set, we send to all collections in the collection list
558 // if super set, we send to all collections in super set list
559 // if a single collection, send to it
560 // if a subset, send to the collection
561 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
562 boolean single_collection = false;
563 if (current_coll_list.size() == 1) {
564 single_collection = true;
565 }
566 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
567 // we have a subset - add the set param back in
568 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
569 }
570
571 int num_collected_records = 0;
572 int start_point = current_cursor; // may not be 0 if we are using a resumption token
573 String resumption_collection = "";
574 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
575
576 // iterate through the list of collections and send the request to each
577
578 int start_coll=0;
579 if (current_set != null) {
580 // we are resuming a previous request, need to locate the first collection
581 for (int i=0; i<current_coll_list.size(); i++) {
582 if (current_set.equals(current_coll_list.get(i))) {
583 start_coll = i;
584 break;
585 }
586 }
587 }
588
589 for (int i=start_coll; i<current_coll_list.size(); i++) {
590 String current_coll = current_coll_list.get(i);
591 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
592
593 Element result = (Element)mr.process(mr_msg);
594 logger.error(verb+ " result for coll "+current_coll);
595 logger.error(this.converter.getPrettyString(result));
596 if (result == null) {
597 logger.info("message router returns null");
598 // do what??? carry on? fail??
599 return OAIXML.createErrorMessage("Internal service returns null", "");
600 }
601 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
602 if(res == null) {
603 logger.info("response element in xml_result is null");
604 return OAIXML.createErrorMessage("Internal service returns null", "");
605 }
606 NodeList record_list = res.getElementsByTagName(record_type);
607 int num_records = record_list.getLength();
608 if(num_records == 0) {
609 logger.info("message router returns 0 records for coll "+current_coll);
610 continue; // try the next collection
611 }
612 if (single_collection) {
613 total_size = num_records;
614 }
615 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
616 if (records_to_add > (num_records-start_point)) {
617 records_to_add = num_records-start_point;
618 }
619 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
620 num_collected_records += records_to_add;
621
622 // do we need to stop here, and do we need to issue a resumption token?
623 if (resume_after > 0 && num_collected_records == resume_after) {
624 // we have finished collecting records at the moment.
625 // but are we conincidentally at the end? or are there more to go?
626 if (records_to_add < (num_records - start_point)) {
627 // we have added less than this collection had
628 start_point += records_to_add;
629 resumption_collection = current_coll;
630 result_token_needed = true;
631 }
632 else {
633 // we added all this collection had to offer
634 // is there another collection in the list??
635 if (i<current_coll_list.size()-1) {
636 result_token_needed = true;
637 start_point = 0;
638 resumption_collection = current_coll_list.get(i+1);
639 }
640 else {
641 // we have finished one collection and there are no more collection
642 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
643 logger.error("at end of list, need empty result token");
644 empty_result_token = true;
645 }
646 }
647 break;
648 }
649 start_point = 0; // only the first one will have start non-zero, if we
650 // have a resumption token
651
652 } // for each collection
653
654 if (num_collected_records ==0) {
655 // there were no matching results
656 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
657 }
658
659 if (num_collected_records < resume_after) {
660 // we have been through all collections, and there are no more
661 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
662 if (result_token_needed) {
663 empty_result_token = true;
664 }
665 }
666
667 if (result_token_needed) {
668 // we need a resumption token
669 if (empty_result_token) {
670 logger.error("have empty result token");
671 token = "";
672 } else {
673 if (token != null) {
674 // we had a token for this request, we can just update it
675 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
676 } else {
677 // we are generating a new one
678 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
679 }
680 }
681
682 // result token XML
683 long expiration_date = -1;
684 if (empty_result_token) {
685 // we know how many records in total as we have sent them all
686 total_size = cursor+num_collected_records;
687 } else {
688 // non-empty token, set the expiration date
689 expiration_date = OAIResumptionToken.getExpirationDate(token);
690 }
691 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
692 // OAIXML.addToken(token_elem); // store it
693 result_element.appendChild(token_elem); // add to the result
694 }
695
696
697 return getMessage(result_doc, result_element);
698 }
699
700 private Vector<String> getCollectionListForSet(String set) {
701 if (set == null) {
702 // no set requested, need the complete collection list
703 return this.collection_name_list;
704 }
705 if (has_super_colls && super_coll_map.containsKey(set)) {
706 return super_coll_map.get(set);
707 }
708
709 Vector<String> coll_list = new Vector<String>();
710 if (set.indexOf(":") != -1) {
711 String col_name = set.substring(0, set.indexOf(":"));
712 coll_list.add(col_name);
713 }
714 else {
715 coll_list.add(set);
716 }
717 return coll_list;
718 }
719 private void addRecordsToList(Document doc, Element result_element, NodeList
720 record_list, int start_point, int num_records) {
721 int end_point = start_point + num_records;
722 for (int i=start_point; i<end_point; i++) {
723 result_element.appendChild(doc.importNode(record_list.item(i), true));
724 }
725 }
726
727
728 // method exclusively used by doListRecords/doListIdentifiers
729 private void getRecords(Element verb_elem, NodeList list, int start_point, int end_point) {
730 for (int i=start_point; i<end_point; i++) {
731 verb_elem.appendChild(verb_elem.getOwnerDocument().importNode(list.item(i), true));
732 }
733 }
734 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
735 if(result == null) {
736 //in the first round, result is null
737 return msg;
738 }
739 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
740 if(res_in_result == null) { // return the results of all other collections accumulated so far
741 return msg;
742 }
743 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
744 if(msg == null) {
745 return result;
746 }
747
748 //e.g., get all <record> elements from the returned message. There may be none of
749 //such element, for example, the collection service returned an error message
750 NodeList elem_list = msg.getElementsByTagName(elem_name);
751
752 for (int i=0; i<elem_list.getLength(); i++) {
753 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
754 }
755 return result;
756 }
757
758
759 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
760 * The first one is handled here, and the last two are processed by OAIPMH.
761 */
762 private Element doListMetadataFormats(Element req) {
763 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
764 //, or there is no parameter; otherwise it is an error
765 //logger.info("" + this.converter.getString(msg));
766
767 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
768 Element param = null;
769 Document lmf_doc = XMLConverter.newDOM();
770 if(params.getLength() == 0) {
771 //this is requesting metadata formats for the whole repository
772 //read the oaiConfig.xml file, return the metadata formats specified there.
773 if (this.listmetadataformats_response != null) {
774 // we have already created it
775 return this.listmetadataformats_response;
776 }
777
778 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
779
780 Element format_list = (Element)GSXML.getChildByTagName(oai_config, OAIXML.LIST_METADATA_FORMATS);
781 if(format_list == null) {
782 logger.error("OAIConfig.xml must contain the supported metadata formats");
783 // TODO this is internal error, what to do???
784 return getMessage(lmf_doc, list_metadata_formats);
785 }
786 NodeList formats = format_list.getElementsByTagName(OAIXML.METADATA_FORMAT);
787 for(int i=0; i<formats.getLength(); i++) {
788 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
789 Element first_meta_format = (Element)formats.item(i);
790 //the element also contains mappings, but we don't want them
791 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
792 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
793 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
794 list_metadata_formats.appendChild(meta_fmt);
795 }
796 return getMessage(lmf_doc, list_metadata_formats);
797
798
799 }
800
801 if (params.getLength() > 1) {
802 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
803 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
804 }
805
806 // This is a request for the metadata of a particular item with an identifier
807 /**the request xml is in the form: <request>
808 * <param name=.../>
809 * </request>
810 *And there is a param element and one element only. (No paramList element in between).
811 */
812 param = (Element)params.item(0);
813 String param_name = param.getAttribute(GSXML.NAME_ATT);
814 String identifier = "";
815 if (!param_name.equals(OAIXML.IDENTIFIER)) {
816 //Bad argument
817 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
818 }
819
820 identifier = param.getAttribute(GSXML.VALUE_ATT);
821 // the identifier is in the form: <coll_name>:<OID>
822 // so it must contain at least two ':' characters
823 String[] strs = identifier.split(":");
824 if(strs == null || strs.length < 2) {
825 // the OID may also contain ':'
826 logger.error("identifier is not in the form coll:id" + identifier);
827 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
828 }
829
830 // send request to message router
831 // get the names
832 strs = splitNames(identifier);
833 if(strs == null || strs.length < 2) {
834 logger.error("identifier is not in the form coll:id" + identifier);
835 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
836 }
837 //String name_of_site = strs[0];
838 String coll_name = strs[0];
839 String oid = strs[1];
840
841 //re-organize the request element
842 // reset the 'to' attribute
843 String verb = req.getAttribute(GSXML.TO_ATT);
844 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
845 // reset the identifier element
846 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
847 param.setAttribute(GSXML.VALUE_ATT, oid);
848
849 // TODO is this the best way to do this???? should we create a new request???
850 Element message = req.getOwnerDocument().createElement(GSXML.MESSAGE_ELEM);
851 message.appendChild(req);
852 //Now send the request to the message router to process
853 Node result_node = mr.process(message);
854 return GSXML.nodeToElement(result_node);
855 }
856
857
858
859
860 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
861 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
862 if(original_element != null) {
863 copyNode(to_elem, original_element);
864 }
865 }
866
867 private void copyNode(Element to_elem, Node original_element) {
868 to_elem.appendChild(to_elem.getOwnerDocument().importNode(original_element, true));
869
870 }
871
872 private Element doIdentify() {
873 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
874 logger.info("");
875 if (this.identify_response != null) {
876 // we have already created it
877 return getMessage(this.identify_response.getOwnerDocument(), this.identify_response);
878 }
879 Document doc = XMLConverter.newDOM();
880 Element identify = doc.createElement(OAIXML.IDENTIFY);
881 //do the repository name
882 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
883 //do the baseurl
884 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
885 //do the protocol version
886 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
887
888 //There can be more than one admin email according to the OAI specification
889 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
890 int num_admin = 0;
891 Element from_admin_email = null;
892 if (admin_emails != null) {
893 num_admin = admin_emails.getLength();
894 }
895 for (int i=0; i<num_admin; i++) {
896 copyNode(identify, admin_emails.item(i));
897 }
898
899 //do the earliestDatestamp
900 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
901 //ask the message router for a list of oai collections
902 //NodeList oai_coll = getOAICollectionList();
903 long earliestDatestamp = getEarliestDateStamp(collection_list);
904 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
905 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
906 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
907 identify.appendChild(earliestDatestamp_elem);
908
909 //do the deletedRecord
910 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
911 //do the granularity
912 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
913
914 // output the oai identifier
915 Element description = doc.createElement(OAIXML.DESCRIPTION);
916 identify.appendChild(description);
917 // TODO, make this a valid id
918 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
919 description.appendChild(oaiIdentifier);
920
921 // if there are any oaiInfo metadata, add them in too.
922 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
923 if (info != null) {
924 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
925 if (meta != null && meta.getLength() > 0) {
926 Element gsdl = OAIXML.createGSDLElement(doc);
927 description.appendChild(gsdl);
928 for (int m = 0; m<meta.getLength(); m++) {
929 copyNode(gsdl, meta.item(m));
930 }
931
932 }
933 }
934 this.identify_response = identify;
935 return getMessage(doc, identify);
936 }
937 //split setSpec (site_name:coll_name) into an array of strings
938 //It has already been checked that the set_spec contains at least one ':'
939 private String[] splitSetSpec(String set_spec) {
940 logger.info(set_spec);
941 String[] strs = new String[2];
942 int colon_index = set_spec.indexOf(":");
943 strs[0] = set_spec.substring(0, colon_index);
944 strs[1] = set_spec.substring(colon_index + 1);
945 return strs;
946 }
947 /** split the identifier into <collection + OID> as an array
948 It has already been checked that the 'identifier' contains at least one ':'
949 */
950 private String[] splitNames(String identifier) {
951 logger.info(identifier);
952 String [] strs = new String[2];
953 int first_colon = identifier.indexOf(":");
954 if(first_colon == -1) {
955 return null;
956 }
957 strs[0] = identifier.substring(0, first_colon);
958 strs[1] = identifier.substring(first_colon + 1);
959 return strs;
960 }
961 /** validate if the specified metadata prefix value is supported by the repository
962 * by checking it in the OAIConfig.xml
963 */
964 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
965 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
966
967 for(int i=0; i<prefix_list.getLength(); i++) {
968 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
969 return true;
970 }
971 }
972 return false;
973 }
974 private Element doGetRecord(Element req){
975 logger.info("");
976 /** arguments:
977 identifier: required
978 metadataPrefix: required
979 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
980 */
981 Document doc = XMLConverter.newDOM();
982 Element get_record = doc.createElement(OAIXML.GET_RECORD);
983
984 HashSet<String> valid_strs = new HashSet<String>();
985 valid_strs.add(OAIXML.IDENTIFIER);
986 valid_strs.add(OAIXML.METADATA_PREFIX);
987
988 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
989 HashMap<String, String> param_map = GSXML.getParamMap(params);
990
991 if(!areAllParamsValid(param_map, valid_strs) ||
992 params.getLength() == 0 ||
993 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
994 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
995 logger.error("must have the metadataPrefix/identifier parameter.");
996 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
997 }
998
999 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
1000 String identifier = param_map.get(OAIXML.IDENTIFIER);
1001
1002 // verify the metadata prefix
1003 if (repositorySupportsMetadataPrefix(prefix) == false) {
1004 logger.error("requested prefix is not found in OAIConfig.xml");
1005 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
1006 }
1007
1008 // get the names
1009 String[] strs = splitNames(identifier);
1010 if(strs == null || strs.length < 2) {
1011 logger.error("identifier is not in the form coll:id" + identifier);
1012 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
1013 }
1014 //String name_of_site = strs[0];
1015 String coll_name = strs[0];
1016 String oid = strs[1];
1017
1018 //re-organize the request element
1019 // reset the 'to' attribute
1020 String verb = req.getAttribute(GSXML.TO_ATT);
1021 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
1022 // reset the identifier element
1023 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
1024 if (param != null) {
1025 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
1026 param.setAttribute(GSXML.VALUE_ATT, oid);
1027 }
1028
1029 //Now send the request to the message router to process
1030 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1031 msg.appendChild(doc.importNode(req, true));
1032 Node result_node = mr.process(msg);
1033 return GSXML.nodeToElement(result_node);
1034 }
1035
1036 // See OAIConfig.xml
1037 // dynamically works out what the earliestDateStamp is, since it varies by collection
1038 // returns this time in *milliseconds*.
1039 protected long getEarliestDateStamp(Element oai_coll_list) {
1040 // config earliest datstamp
1041 long config_datestamp = 0;
1042 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1043 if (config_datestamp_elem != null) {
1044 String datest = GSXML.getNodeText(config_datestamp_elem);
1045 config_datestamp = OAIXML.getTime(datest);
1046 if (config_datestamp == -1) {
1047 config_datestamp = 0;
1048 }
1049 }
1050 //do the earliestDatestamp
1051 long current_time = System.currentTimeMillis();
1052 long earliestDatestamp = current_time;
1053 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1054 int oai_coll_size = oai_coll.getLength();
1055 if (oai_coll_size == 0) {
1056 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1057 return config_datestamp;
1058 }
1059 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1060 // we get the earliestDatestamp among the collections
1061 for(int i=0; i<oai_coll_size; i++) {
1062 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1063 if (coll_earliestDatestamp == 0) {
1064 // try last modified
1065 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1066 }
1067 if (coll_earliestDatestamp > 0) {
1068 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1069 }
1070 }
1071 if (earliestDatestamp == current_time) {
1072 logger.info("no collection had a real datestamp, using value from OAIConfig");
1073 return config_datestamp;
1074 }
1075 return earliestDatestamp;
1076 }
1077
1078 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1079
1080 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1081 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1082
1083 Node child = this.collection_list.getFirstChild();
1084 while (child != null) {
1085 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1086 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1087 if (set_coll_list.contains(coll_id)) {
1088 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1089 if (initial_time < last_modified) {
1090 return true;
1091 }
1092 }
1093 }
1094 child = child.getNextSibling();
1095 }
1096 return false;
1097
1098 }
1099
1100}
1101
1102
Note: See TracBrowser for help on using the repository browser.