source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 28987

Last change on this file since 28987 was 28987, checked in by kjdon, 10 years ago

some tidying up. have started work on new metadata format stuff, but its not finished yet.

File size: 42.1 KB
Line 
1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
47 /** The unique repository identifier */
48 protected String repository_id = null;
49
50 /** the configure file of this receptionist passed from the oai servlet. */
51 protected Element oai_config = null;
52
53 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
54 protected int resume_after = -1 ;
55
56 /** the message router that the Receptionist and Actions will talk to */
57 protected ModuleInterface mr = null;
58
59 // Some of the data/responses will not change while the servlet is running, so
60 // we can cache them
61
62 /** A list of all the collections available to this OAI server */
63 protected Element collection_list = null;
64 /** a vector of the names, for convenience */
65 protected Vector<String> collection_name_list = null;
66 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
67 protected boolean noRecordsMatch = false;
68
69 /** A set of all known 'sets' */
70 protected HashSet<String> set_set = null;
71
72 protected boolean has_super_colls = false;
73 /** a hash of super set-> collection list */
74 protected HashMap<String, Vector<String>> super_coll_map = null;
75 /** store the super coll elements for convenience */
76 HashMap<String, Element> super_coll_data = null;
77 /** store the metadata formats ??????*/
78 /** The identify response */
79 protected Element identify_response = null;
80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
85 public OAIReceptionist() {
86
87 }
88
89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
120 repository_id = getRepositoryIdentifier();
121 configureSuperSetInfo();
122 if (!configureSetInfo()) {
123 // there are no sets
124 logger.error("No sets (collections) available for OAI");
125 return false;
126 }
127
128 //clear out expired resumption tokens stored in OAIResumptionToken.xml
129 OAIResumptionToken.init();
130 OAIResumptionToken.clearExpiredTokens();
131
132 return true;
133 }
134
135 // assuming that sets are static. If collections change then the servlet
136 // should be restarted.
137 private boolean configureSuperSetInfo() {
138 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
139 // Will be like
140 // <oaiSuperSet>
141 // <SetSpec>xxx</SetSpec>
142 // <setName>xxx</SetName>
143 // <SetDescription>xxx</setDescription>
144 // </oaiSuperSet>
145 // The super set is listed in OAIConfig, and collections themselves state
146 // whether they are part of the super set or not.
147 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
148 this.super_coll_data = new HashMap<String, Element>();
149 if (super_coll_list.getLength() > 0) {
150 this.has_super_colls = true;
151 for (int i=0; i<super_coll_list.getLength(); i++) {
152 Element super_coll = (Element)super_coll_list.item(i);
153 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
154 if (set_spec != null) {
155 String name = GSXML.getNodeText(set_spec);
156 if (!name.equals("")) {
157 this.super_coll_data.put(name, super_coll);
158 logger.error("adding in super coll "+name);
159 }
160 }
161 }
162
163 if (this.super_coll_data.size()==0) {
164 this.has_super_colls = false;
165 }
166 }
167 if (this.has_super_colls == true) {
168 this.super_coll_map = new HashMap<String, Vector<String>>();
169 }
170 return true;
171
172 }
173 private boolean configureSetInfo() {
174 this.set_set = new HashSet<String>();
175
176 // First, we get a list of all the OAI enabled collections
177 // We get this by sending a listSets request to the MR
178 Document doc = XMLConverter.newDOM();
179 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
180
181 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
182 message.appendChild(request);
183 Node msg_node = mr.process(message);
184
185 if (msg_node == null) {
186 logger.error("returned msg_node from mr is null");
187 return false;
188 }
189 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
190 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
191 if (coll_list == null) {
192 logger.error("coll_list is null");
193 return false;
194 }
195
196 this.collection_list = (Element)doc.importNode(coll_list, true);
197
198 // go through and store a list of collection names for convenience
199 // also create a 'to' attribute for the next request to the MR, which
200 // is a ListSets request to each collection
201 Node child = this.collection_list.getFirstChild();
202 if (child == null) {
203 logger.error("collection list has no children");
204 noRecordsMatch = true;
205 return false;
206 }
207
208 this.collection_name_list = new Vector<String>();
209 StringBuffer to = new StringBuffer();
210 boolean first = true;
211 while (child != null) {
212 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
213 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
214 this.collection_name_list.add(coll_id);
215 if (!first) {
216 to.append(',');
217 }
218 first = false;
219 to.append(coll_id+"/"+OAIXML.LIST_SETS);
220 }
221 child = child.getNextSibling();
222 }
223 if (first) {
224 // we haven't found any collections
225 logger.error("found no collection elements in collectionList");
226 noRecordsMatch = true;
227 return false;
228 }
229 Document listsets_doc = XMLConverter.newDOM();
230 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
231 this.listsets_response = getMessage(listsets_doc, listsets_element);
232
233 // Now, for each collection, get a list of all its sets
234 // might include subsets (classifiers) or super colls
235 // We'll reuse the first message, changing its type and to atts
236 request.setAttribute(GSXML.TYPE_ATT, "");
237 request.setAttribute(GSXML.TO_ATT, to.toString());
238 // send to MR
239 msg_node = mr.process(message);
240 logger.error(XMLConverter.getPrettyString(msg_node));
241 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
242 for (int c=0; c<response_list.getLength(); c++) {
243 // for each collection's response
244 Element response = (Element)response_list.item(c);
245 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
246 logger.error("coll from response "+coll_name);
247 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
248 for (int j=0; j<set_list.getLength(); j++) {
249 // now check if it a super collection
250 Element set = (Element)set_list.item(j);
251 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
252 logger.error("set spec = "+set_spec);
253 // this may change if we add site name back in
254 // setSpecs will be collname or collname:subset or supercollname
255 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
256 // it must be a super coll spec
257 logger.error("found super coll, "+set_spec);
258 // check that it is a valid one from config
259 if (this.has_super_colls == true && this.super_coll_data.containsKey(set_spec)) {
260 Vector <String> subcolls = this.super_coll_map.get(set_spec);
261 if (subcolls == null) {
262 logger.error("its new!!");
263 // not in there yet
264 subcolls = new Vector<String>();
265 this.set_set.add(set_spec);
266 this.super_coll_map.put(set_spec, subcolls);
267 // the first time a supercoll is mentioned, add into the set list
268 logger.error("finding the set info "+XMLConverter.getPrettyString(this.super_coll_data.get(set_spec)));
269 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, this.super_coll_data.get(set_spec), OAIXML.SET, true));
270 }
271 // add this collection to the list for the super coll
272 subcolls.add(coll_name);
273 }
274 } else { // its either the coll itself or a subcoll
275 // add in the set
276 listsets_element.appendChild(listsets_doc.importNode(set, true));
277 this.set_set.add(set_spec);
278 }
279 } // for each set in the collection
280 } // for each OAI enabled collection
281 return true;
282 }
283
284 protected void resetMessageRouter() {
285 // we just need to send a configure request to MR
286 Document doc = XMLConverter.newDOM();
287 Element mr_request_message = doc.createElement(GSXML.MESSAGE_ELEM);
288 Element mr_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_SYSTEM, "", null);
289 mr_request_message.appendChild(mr_request);
290
291 Element system = doc.createElement(GSXML.SYSTEM_ELEM);
292 mr_request.appendChild(system);
293 system.setAttribute(GSXML.TYPE_ATT, GSXML.SYSTEM_TYPE_CONFIGURE);
294
295 Element response = (Element) this.mr.process(mr_request_message);
296 logger.error("configure response = "+XMLConverter.getPrettyString(response));
297 }
298 /** process using strings - just calls process using Elements */
299 public String process(String xml_in) {
300
301 Node message_node = XMLConverter.getDOM(xml_in);
302 Node page = process(message_node);
303 return XMLConverter.getString(page);
304 }
305
306 //Compose a message/response element used to send back to the OAIServer servlet.
307 //This method is only used within OAIReceptionist
308 private Element getMessage(Document doc, Element e) {
309 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
310 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
311 msg.appendChild(response);
312 response.appendChild(e);
313 return msg;
314 }
315
316 /** process - produce xml data in response to a request
317 * if something goes wrong, it returns null -
318 */
319 public Node process(Node message_node) {
320 logger.error("OAIReceptionist received request");
321
322 Element message = GSXML.nodeToElement(message_node);
323 logger.error(XMLConverter.getString(message));
324
325 // check that its a correct message tag
326 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
327 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
328 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
329 }
330
331 // get the request out of the message - assume that there is only one
332 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
333 if (request == null) {
334 logger.error(" message had no request!");
335 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
336 }
337
338 // special case, reset=true for reloading the MR and recept data
339 String reset = request.getAttribute("reset");
340 if (!reset.equals("")) {
341 resetMessageRouter();
342 configureSetInfo();
343 return OAIXML.createResetResponse(true);
344 }
345
346
347 //At this stage, the value of 'to' attribute of the request must be the 'verb'
348 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
349 String verb = request.getAttribute(GSXML.TO_ATT);
350 if (verb.equals(OAIXML.IDENTIFY)) {
351 return doIdentify();
352 }
353 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
354 return doListMetadataFormats(request);
355 }
356 if (verb.equals(OAIXML.LIST_SETS)) {
357 // we have composed the list sets response on init
358 // Note this means that list sets never uses resumption tokens
359 return this.listsets_response;
360 }
361 if (verb.equals(OAIXML.GET_RECORD)) {
362 return doGetRecord(request);
363 }
364 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
365 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
366 }
367 if (verb.equals(OAIXML.LIST_RECORDS)) {
368 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
369 }
370 // should never get here as verbs were checked in OAIServer
371 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
372
373 }
374
375
376 private int getResumeAfter() {
377 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
378 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
379 return -1;
380 }
381 private String getRepositoryIdentifier() {
382 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
383 if (ri != null) {
384 return GSXML.getNodeText(ri);
385 }
386 return "";
387 }
388
389
390 /** if the param_map contains strings other than those in valid_strs, return false;
391 * otherwise true.
392 */
393 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
394 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
395 for(int i=0; i<param_list.size(); i++) {
396 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
397 if (valid_strs.contains(param_list.get(i)) == false) {
398 return false;
399 }
400 }
401 return true;
402 }
403
404 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
405 // options: from, until, set, metadataPrefix, resumptionToken
406 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
407 HashSet<String> valid_strs = new HashSet<String>();
408 valid_strs.add(OAIXML.FROM);
409 valid_strs.add(OAIXML.UNTIL);
410 valid_strs.add(OAIXML.SET);
411 valid_strs.add(OAIXML.METADATA_PREFIX);
412 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
413
414 Document result_doc = XMLConverter.newDOM();
415 Element result_element = result_doc.createElement(verb);
416 boolean result_token_needed = false; // does this result need to include a
417 // resumption token
418
419 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
420
421 HashMap<String, String> param_map = GSXML.getParamMap(params);
422
423 // are all the params valid?
424 if (!areAllParamsValid(param_map, valid_strs)) {
425 logger.error("One of the params is invalid");
426 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
427 // TODO, need to tell the user which one was invalid ??
428 }
429
430 // Do we have a resumption token??
431 String token = null;
432 String from = null;
433 String until = null;
434 boolean set_requested = false;
435 String set_spec_str = null;
436 String prefix_value = null;
437 int cursor = 0;
438 int current_cursor = 0;
439 String current_set = null;
440 long initial_time = 0;
441
442 int total_size = -1; // we are only going to set this in resumption
443 // token if it is easy to work out, i.e. not sending extra requests to
444 // MR just to calculate total size
445
446 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
447 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
448 // validate resumptionToken
449 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
450 logger.info("has resumptionToken " + token);
451 if(OAIResumptionToken.isValidToken(token) == false) {
452 logger.error("token is not valid");
453 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
454 }
455 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
456 // initialise the request params from the stored token data
457 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
458 from = token_data.get(OAIXML.FROM);
459 until = token_data.get(OAIXML.UNTIL);
460 set_spec_str = token_data.get(OAIXML.SET);
461 if (set_spec_str != null) {
462 set_requested = true;
463 }
464 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
465 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
466 try {
467 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
468 cursor = cursor + resume_after; // increment cursor
469 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
470 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
471 } catch (NumberFormatException e) {
472 logger.error("tried to parse int from cursor data and failed");
473 }
474
475 // check that the collections/sets haven't changed since the token was issued
476 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
477 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
478 OAIResumptionToken.expireToken(token);
479 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
480 }
481 }
482 else {
483 // no resumption token, lets check the other params
484 // there must be a metadataPrefix
485 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
486 logger.error("metadataPrefix param required");
487 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
488 }
489
490 //if there are any date params, check they're of the right format
491 from = param_map.get(OAIXML.FROM);
492 if(from != null) {
493 Date from_date = OAIXML.getDate(from);
494 if(from_date == null) {
495 logger.error("invalid date: " + from);
496 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
497 }
498 }
499 until = param_map.get(OAIXML.UNTIL);
500 if(until != null) {
501 Date until_date = OAIXML.getDate(until);
502 if(until_date == null) {
503 logger.error("invalid date: " + until);
504 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
505 }
506 }
507 if(from != null && until != null) { // check they are of the same date-time format (granularity)
508 if(from.length() != until.length()) {
509 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
510 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
511 }
512 }
513
514 // check the set arg is a set we know about
515 set_requested = param_map.containsKey(OAIXML.SET);
516 set_spec_str = null;
517 if(set_requested == true) {
518 set_spec_str = param_map.get(OAIXML.SET);
519 if (!this.set_set.contains(set_spec_str)) {
520 // the set is not one we know about
521 logger.error("requested set is not found in this repository");
522 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
523
524 }
525 }
526 // Is the metadataPrefix arg one this repository supports?
527 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
528 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
529 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
530 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
531 }
532
533 } // else no resumption token, check other params
534
535 // Whew. Now we have validated the params, we can work on doing the actual
536 // request
537
538
539 Document doc = XMLConverter.newDOM();
540 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
541 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
542 // TODO does this need a type???
543 mr_msg.appendChild(mr_req);
544
545 // copy in the from/until params if there
546 if (from != null) {
547 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
548 }
549 if (until != null) {
550 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
551 }
552 // add metadataPrefix
553 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
554
555 // do we have a set???
556 // if no set, we send to all collections in the collection list
557 // if super set, we send to all collections in super set list
558 // if a single collection, send to it
559 // if a subset, send to the collection
560 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
561 boolean single_collection = false;
562 if (current_coll_list.size() == 1) {
563 single_collection = true;
564 }
565 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
566 // we have a subset - add the set param back in
567 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
568 }
569
570 int num_collected_records = 0;
571 int start_point = current_cursor; // may not be 0 if we are using a resumption token
572 String resumption_collection = "";
573 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
574
575 // iterate through the list of collections and send the request to each
576
577 int start_coll=0;
578 if (current_set != null) {
579 // we are resuming a previous request, need to locate the first collection
580 for (int i=0; i<current_coll_list.size(); i++) {
581 if (current_set.equals(current_coll_list.get(i))) {
582 start_coll = i;
583 break;
584 }
585 }
586 }
587
588 for (int i=start_coll; i<current_coll_list.size(); i++) {
589 String current_coll = current_coll_list.get(i);
590 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
591
592 Element result = (Element)mr.process(mr_msg);
593 logger.error(verb+ " result for coll "+current_coll);
594 logger.error(XMLConverter.getPrettyString(result));
595 if (result == null) {
596 logger.info("message router returns null");
597 // do what??? carry on? fail??
598 return OAIXML.createErrorMessage("Internal service returns null", "");
599 }
600 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
601 if(res == null) {
602 logger.info("response element in xml_result is null");
603 return OAIXML.createErrorMessage("Internal service returns null", "");
604 }
605 NodeList record_list = res.getElementsByTagName(record_type);
606 int num_records = record_list.getLength();
607 if(num_records == 0) {
608 logger.info("message router returns 0 records for coll "+current_coll);
609 continue; // try the next collection
610 }
611 if (single_collection) {
612 total_size = num_records;
613 }
614 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
615 if (records_to_add > (num_records-start_point)) {
616 records_to_add = num_records-start_point;
617 }
618 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
619 num_collected_records += records_to_add;
620
621 // do we need to stop here, and do we need to issue a resumption token?
622 if (resume_after > 0 && num_collected_records == resume_after) {
623 // we have finished collecting records at the moment.
624 // but are we conincidentally at the end? or are there more to go?
625 if (records_to_add < (num_records - start_point)) {
626 // we have added less than this collection had
627 start_point += records_to_add;
628 resumption_collection = current_coll;
629 result_token_needed = true;
630 }
631 else {
632 // we added all this collection had to offer
633 // is there another collection in the list??
634 if (i<current_coll_list.size()-1) {
635 result_token_needed = true;
636 start_point = 0;
637 resumption_collection = current_coll_list.get(i+1);
638 }
639 else {
640 // we have finished one collection and there are no more collection
641 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
642 logger.error("at end of list, need empty result token");
643 empty_result_token = true;
644 }
645 }
646 break;
647 }
648 start_point = 0; // only the first one will have start non-zero, if we
649 // have a resumption token
650
651 } // for each collection
652
653 if (num_collected_records ==0) {
654 // there were no matching results
655 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
656 }
657
658 if (num_collected_records < resume_after) {
659 // we have been through all collections, and there are no more
660 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
661 if (result_token_needed) {
662 empty_result_token = true;
663 }
664 }
665
666 if (result_token_needed) {
667 // we need a resumption token
668 if (empty_result_token) {
669 logger.error("have empty result token");
670 token = "";
671 } else {
672 if (token != null) {
673 // we had a token for this request, we can just update it
674 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
675 } else {
676 // we are generating a new one
677 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
678 }
679 }
680
681 // result token XML
682 long expiration_date = -1;
683 if (empty_result_token) {
684 // we know how many records in total as we have sent them all
685 total_size = cursor+num_collected_records;
686 } else {
687 // non-empty token, set the expiration date
688 expiration_date = OAIResumptionToken.getExpirationDate(token);
689 }
690 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
691 // OAIXML.addToken(token_elem); // store it
692 result_element.appendChild(token_elem); // add to the result
693 }
694
695
696 return getMessage(result_doc, result_element);
697 }
698
699 private Vector<String> getCollectionListForSet(String set) {
700 if (set == null) {
701 // no set requested, need the complete collection list
702 return this.collection_name_list;
703 }
704 if (has_super_colls && super_coll_map.containsKey(set)) {
705 return super_coll_map.get(set);
706 }
707
708 Vector<String> coll_list = new Vector<String>();
709 if (set.indexOf(":") != -1) {
710 String col_name = set.substring(0, set.indexOf(":"));
711 coll_list.add(col_name);
712 }
713 else {
714 coll_list.add(set);
715 }
716 return coll_list;
717 }
718 private void addRecordsToList(Document doc, Element result_element, NodeList
719 record_list, int start_point, int num_records) {
720 int end_point = start_point + num_records;
721 for (int i=start_point; i<end_point; i++) {
722 result_element.appendChild(doc.importNode(record_list.item(i), true));
723 }
724 }
725
726 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
727 if(result == null) {
728 //in the first round, result is null
729 return msg;
730 }
731 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
732 if(res_in_result == null) { // return the results of all other collections accumulated so far
733 return msg;
734 }
735 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
736 if(msg == null) {
737 return result;
738 }
739
740 //e.g., get all <record> elements from the returned message. There may be none of
741 //such element, for example, the collection service returned an error message
742 NodeList elem_list = msg.getElementsByTagName(elem_name);
743
744 for (int i=0; i<elem_list.getLength(); i++) {
745 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
746 }
747 return result;
748 }
749
750
751 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
752 * The first one is handled here, and the last two are processed by OAIPMH.
753 */
754 private Element doListMetadataFormats(Element req) {
755 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
756 //, or there is no parameter; otherwise it is an error
757 //logger.info("" + XMLConverter.getString(msg));
758
759 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
760 Element param = null;
761 Document lmf_doc = XMLConverter.newDOM();
762 if(params.getLength() == 0) {
763 //this is requesting metadata formats for the whole repository
764 //read the oaiConfig.xml file, return the metadata formats specified there.
765 if (this.listmetadataformats_response != null) {
766 // we have already created it
767 return this.listmetadataformats_response;
768 }
769
770 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
771 // get all the formats out of oai_config
772 NodeList formats = oai_config.getElementsByTagName(OAIXML.METADATA_FORMAT);
773 if (formats.getLength() ==0) {
774 logger.error("OAIConfig.xml must contain the supported metadata formats");
775 // TODO this is internal error, what to do???
776 return getMessage(lmf_doc, list_metadata_formats);
777 }
778
779 for(int i=0; i<formats.getLength(); i++) {
780 Element meta_fmt = lmf_doc.createElement(OAIXML.METADATA_FORMAT);
781 Element first_meta_format = (Element)formats.item(i);
782 //the element also contains mappings, but we don't want them
783 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_PREFIX), true));
784 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.SCHEMA), true));
785 meta_fmt.appendChild(lmf_doc.importNode(GSXML.getChildByTagName(first_meta_format, OAIXML.METADATA_NAMESPACE), true));
786 list_metadata_formats.appendChild(meta_fmt);
787 }
788 this.listmetadataformats_response = getMessage(lmf_doc, list_metadata_formats);
789 return this.listmetadataformats_response;
790
791 }
792
793 if (params.getLength() > 1) {
794 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
795 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
796 }
797
798 // This is a request for the metadata of a particular item with an identifier
799 /**the request xml is in the form: <request>
800 * <param name=.../>
801 * </request>
802 *And there is a param element and one element only. (No paramList element in between).
803 */
804 param = (Element)params.item(0);
805 String param_name = param.getAttribute(GSXML.NAME_ATT);
806 String identifier = "";
807 if (!param_name.equals(OAIXML.IDENTIFIER)) {
808 //Bad argument
809 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
810 }
811
812 identifier = param.getAttribute(GSXML.VALUE_ATT);
813 // the identifier is in the form: <coll_name>:<OID>
814 // so it must contain at least one ':' characters
815 // (the oid itself may contain : chars)
816 String[] strs = identifier.split(":", 2);
817 if(strs.length != 2) {
818 logger.error("identifier is not in the form coll:id" + identifier);
819 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
820 }
821
822 // send request to message router
823 // get the names
824 String coll_name = strs[0];
825 String oid = strs[1];
826
827 Document msg_doc = XMLConverter.newDOM();
828 Element message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
829 String verb = req.getAttribute(GSXML.TO_ATT);
830 String new_to = coll_name + "/" + verb;
831 Element request = GSXML.createBasicRequest(msg_doc, "oai???", new_to, null);
832 message.appendChild(request);
833 // add the id param
834 GSXML.addParameterToList(request, OAIXML.OID, oid);
835
836 //Now send the request to the message router to process
837 Node result_node = mr.process(message);
838 return GSXML.nodeToElement(result_node);
839 }
840
841
842
843
844 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
845 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
846 if(original_element != null) {
847 GSXML.copyNode(to_elem, original_element);
848 }
849 }
850
851
852 private Element doIdentify() {
853 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
854 logger.info("");
855 if (this.identify_response != null) {
856 // we have already created it
857 return getMessage(this.identify_response.getOwnerDocument(), this.identify_response);
858 }
859 Document doc = XMLConverter.newDOM();
860 Element identify = doc.createElement(OAIXML.IDENTIFY);
861 //do the repository name
862 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
863 //do the baseurl
864 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
865 //do the protocol version
866 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
867
868 //There can be more than one admin email according to the OAI specification
869 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
870 int num_admin = 0;
871 Element from_admin_email = null;
872 if (admin_emails != null) {
873 num_admin = admin_emails.getLength();
874 }
875 for (int i=0; i<num_admin; i++) {
876 GSXML.copyNode(identify, admin_emails.item(i));
877 }
878
879 //do the earliestDatestamp
880 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
881 //ask the message router for a list of oai collections
882 //NodeList oai_coll = getOAICollectionList();
883 long earliestDatestamp = getEarliestDateStamp(collection_list);
884 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
885 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
886 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
887 identify.appendChild(earliestDatestamp_elem);
888
889 //do the deletedRecord
890 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
891 //do the granularity
892 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
893
894 // output the oai identifier
895 Element description = doc.createElement(OAIXML.DESCRIPTION);
896 identify.appendChild(description);
897 // TODO, make this a valid id
898 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
899 description.appendChild(oaiIdentifier);
900
901 // if there are any oaiInfo metadata, add them in too.
902 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
903 if (info != null) {
904 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
905 if (meta != null && meta.getLength() > 0) {
906 Element gsdl = OAIXML.createGSDLElement(doc);
907 description.appendChild(gsdl);
908 for (int m = 0; m<meta.getLength(); m++) {
909 GSXML.copyNode(gsdl, meta.item(m));
910 }
911
912 }
913 }
914 this.identify_response = identify;
915 return getMessage(doc, identify);
916 }
917 /** split the identifier into <collection + OID> as an array
918 It has already been checked that the 'identifier' contains at least one ':'
919 */
920 // private String[] splitNames(String identifier) {
921 // logger.info(identifier);
922 // String [] strs = new String[2];
923 // int first_colon = identifier.indexOf(":");
924 // if(first_colon == -1) {
925 // return null;
926 // }
927 // strs[0] = identifier.substring(0, first_colon);
928 // strs[1] = identifier.substring(first_colon + 1);
929 // return strs;
930 // }
931 /** validate if the specified metadata prefix value is supported by the repository
932 * by checking it in the OAIConfig.xml
933 */
934 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
935 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
936
937 for(int i=0; i<prefix_list.getLength(); i++) {
938 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
939 return true;
940 }
941 }
942 return false;
943 }
944 private Element doGetRecord(Element req){
945 logger.info("");
946 /** arguments:
947 identifier: required
948 metadataPrefix: required
949 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
950 */
951 Document doc = XMLConverter.newDOM();
952 Element get_record = doc.createElement(OAIXML.GET_RECORD);
953
954 HashSet<String> valid_strs = new HashSet<String>();
955 valid_strs.add(OAIXML.IDENTIFIER);
956 valid_strs.add(OAIXML.METADATA_PREFIX);
957
958 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
959 HashMap<String, String> param_map = GSXML.getParamMap(params);
960
961 if(!areAllParamsValid(param_map, valid_strs) ||
962 params.getLength() == 0 ||
963 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
964 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
965 logger.error("must have the metadataPrefix/identifier parameter.");
966 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
967 }
968
969 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
970 String identifier = param_map.get(OAIXML.IDENTIFIER);
971
972 // verify the metadata prefix
973 if (repositorySupportsMetadataPrefix(prefix) == false) {
974 logger.error("requested prefix is not found in OAIConfig.xml");
975 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
976 }
977
978 // get the names
979 String[] strs = identifier.split(":", 2);
980 if(strs == null || strs.length < 2) {
981 logger.error("identifier is not in the form coll:id" + identifier);
982 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
983 }
984 //String name_of_site = strs[0];
985 String coll_name = strs[0];
986 String oid = strs[1];
987
988 //re-organize the request element
989 // reset the 'to' attribute
990 String verb = req.getAttribute(GSXML.TO_ATT);
991 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
992 // reset the identifier element
993 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
994 if (param != null) {
995 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
996 param.setAttribute(GSXML.VALUE_ATT, oid);
997 }
998
999 //Now send the request to the message router to process
1000 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
1001 msg.appendChild(doc.importNode(req, true));
1002 Node result_node = mr.process(msg);
1003 return GSXML.nodeToElement(result_node);
1004 }
1005
1006 // See OAIConfig.xml
1007 // dynamically works out what the earliestDateStamp is, since it varies by collection
1008 // returns this time in *milliseconds*.
1009 protected long getEarliestDateStamp(Element oai_coll_list) {
1010 // config earliest datstamp
1011 long config_datestamp = 0;
1012 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1013 if (config_datestamp_elem != null) {
1014 String datest = GSXML.getNodeText(config_datestamp_elem);
1015 config_datestamp = OAIXML.getTime(datest);
1016 if (config_datestamp == -1) {
1017 config_datestamp = 0;
1018 }
1019 }
1020 //do the earliestDatestamp
1021 long current_time = System.currentTimeMillis();
1022 long earliestDatestamp = current_time;
1023 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
1024 int oai_coll_size = oai_coll.getLength();
1025 if (oai_coll_size == 0) {
1026 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1027 return config_datestamp;
1028 }
1029 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1030 // we get the earliestDatestamp among the collections
1031 for(int i=0; i<oai_coll_size; i++) {
1032 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
1033 if (coll_earliestDatestamp == 0) {
1034 // try last modified
1035 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1036 }
1037 if (coll_earliestDatestamp > 0) {
1038 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1039 }
1040 }
1041 if (earliestDatestamp == current_time) {
1042 logger.info("no collection had a real datestamp, using value from OAIConfig");
1043 return config_datestamp;
1044 }
1045 return earliestDatestamp;
1046 }
1047
1048 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1049
1050 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1051 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1052
1053 Node child = this.collection_list.getFirstChild();
1054 while (child != null) {
1055 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1056 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1057 if (set_coll_list.contains(coll_id)) {
1058 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1059 if (initial_time < last_modified) {
1060 return true;
1061 }
1062 }
1063 }
1064 child = child.getNextSibling();
1065 }
1066 return false;
1067
1068 }
1069
1070}
1071
1072
Note: See TracBrowser for help on using the repository browser.