source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/OAIReceptionist.java@ 29066

Last change on this file since 29066 was 29066, checked in by kjdon, 10 years ago

small code tidy

File size: 41.6 KB
RevLine 
[28857]1/*
2 * OAIReceptionist.java
3 * Copyright (C) 2012 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
[14211]20package org.greenstone.gsdl3.core;
21
22import org.greenstone.gsdl3.util.*;
23import org.greenstone.gsdl3.action.*;
24// XML classes
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29
30// other java classes
31import java.io.File;
32import java.util.*;
33
34import org.apache.log4j.*;
35
36/** a Receptionist, used for oai metadata response xml generation.
37 * This receptionist talks to the message router directly,
38 * instead of via any action, hence no action map is needed.
39 * @see the basic Receptionist
40 */
41public class OAIReceptionist implements ModuleInterface {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.core.OAIReceptionist.class.getName());
44
45 /** Instead of a config_params object, only a site_name is needed by oai receptionist. */
46 protected String site_name = null;
[27672]47 /** The unique repository identifier */
48 protected String repository_id = null;
[28987]49
[14211]50 /** the configure file of this receptionist passed from the oai servlet. */
51 protected Element oai_config = null;
52
53 /** contained in the OAIConfig.xml deciding whether the resumptionToken should be in use */
54 protected int resume_after = -1 ;
55
56 /** the message router that the Receptionist and Actions will talk to */
57 protected ModuleInterface mr = null;
58
[27672]59 // Some of the data/responses will not change while the servlet is running, so
60 // we can cache them
61
62 /** A list of all the collections available to this OAI server */
[28879]63 protected Element collection_list = null;
[28857]64 /** a vector of the names, for convenience */
65 protected Vector<String> collection_name_list = null;
66 /** If this is true, then there are no OAI enabled collections, so can always return noRecordsMatch (after validating the request params) */
67 protected boolean noRecordsMatch = false;
68
69 /** A set of all known 'sets' */
70 protected HashSet<String> set_set = null;
[27672]71
[28857]72 protected boolean has_super_colls = false;
73 /** a hash of super set-> collection list */
74 protected HashMap<String, Vector<String>> super_coll_map = null;
[28885]75 /** store the super coll elements for convenience */
76 HashMap<String, Element> super_coll_data = null;
[28987]77 /** store the metadata formats ??????*/
[27672]78 /** The identify response */
79 protected Element identify_response = null;
[28857]80 /** The list set response */
81 protected Element listsets_response = null;
82 /** the list metadata formats response */
83 protected Element listmetadataformats_response = null;
84
[14211]85 public OAIReceptionist() {
[28987]86
[14211]87 }
88
[28873]89 public void cleanUp() {
90 if (this.mr != null) {
91
92 this.mr.cleanUp();
93 }
94 OAIResumptionToken.saveTokensToFile();
95 }
[14211]96
97 public void setSiteName(String site_name) {
98 this.site_name = site_name;
99 }
100 /** sets the message router - it should already be created and
101 * configured in the init() of a servlet (OAIServer, for example) before being passed to the receptionist*/
102 public void setMessageRouter(ModuleInterface mr) {
103 this.mr = mr;
104 }
105
106 /** configures the receptionist */
107 public boolean configure(Element config) {
108
109 if (this.mr==null) {
110 logger.error(" message routers must be set before calling oai configure");
111 return false;
112 }
113 if (config == null) {
114 logger.error(" oai configure file is null");
115 return false;
116 }
117 oai_config = config;
118 resume_after = getResumeAfter();
119
[28857]120 repository_id = getRepositoryIdentifier();
[28885]121 configureSuperSetInfo();
[28857]122 if (!configureSetInfo()) {
123 // there are no sets
124 logger.error("No sets (collections) available for OAI");
125 return false;
126 }
[27672]127
[14211]128 //clear out expired resumption tokens stored in OAIResumptionToken.xml
[28857]129 OAIResumptionToken.init();
130 OAIResumptionToken.clearExpiredTokens();
[14211]131
132 return true;
133 }
[28857]134
135 // assuming that sets are static. If collections change then the servlet
136 // should be restarted.
[28885]137 private boolean configureSuperSetInfo() {
[28857]138 // do we have any super colls listed in web/WEB-INF/classes/OAIConfig.xml?
139 // Will be like
140 // <oaiSuperSet>
141 // <SetSpec>xxx</SetSpec>
142 // <setName>xxx</SetName>
143 // <SetDescription>xxx</setDescription>
144 // </oaiSuperSet>
145 // The super set is listed in OAIConfig, and collections themselves state
146 // whether they are part of the super set or not.
147 NodeList super_coll_list = this.oai_config.getElementsByTagName(OAIXML.OAI_SUPER_SET);
[28885]148 this.super_coll_data = new HashMap<String, Element>();
[28857]149 if (super_coll_list.getLength() > 0) {
150 this.has_super_colls = true;
151 for (int i=0; i<super_coll_list.getLength(); i++) {
152 Element super_coll = (Element)super_coll_list.item(i);
153 Element set_spec = (Element)GSXML.getChildByTagName(super_coll, OAIXML.SET_SPEC);
154 if (set_spec != null) {
155 String name = GSXML.getNodeText(set_spec);
156 if (!name.equals("")) {
[28885]157 this.super_coll_data.put(name, super_coll);
[28857]158 logger.error("adding in super coll "+name);
159 }
160 }
161 }
162
[28885]163 if (this.super_coll_data.size()==0) {
[28857]164 this.has_super_colls = false;
165 }
166 }
167 if (this.has_super_colls == true) {
168 this.super_coll_map = new HashMap<String, Vector<String>>();
169 }
[28885]170 return true;
171
172 }
173 private boolean configureSetInfo() {
[28857]174 this.set_set = new HashSet<String>();
175
[28885]176 // First, we get a list of all the OAI enabled collections
[28857]177 // We get this by sending a listSets request to the MR
[28966]178 Document doc = XMLConverter.newDOM();
[28857]179 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
180
181 Element request = GSXML.createBasicRequest(doc, OAIXML.OAI_SET_LIST, "", null);
182 message.appendChild(request);
183 Node msg_node = mr.process(message);
184
185 if (msg_node == null) {
186 logger.error("returned msg_node from mr is null");
187 return false;
188 }
189 Element resp = (Element)GSXML.getChildByTagName(msg_node, GSXML.RESPONSE_ELEM);
190 Element coll_list = (Element)GSXML.getChildByTagName(resp, GSXML.COLLECTION_ELEM + GSXML.LIST_MODIFIER);
191 if (coll_list == null) {
192 logger.error("coll_list is null");
193 return false;
194 }
195
[28879]196 this.collection_list = (Element)doc.importNode(coll_list, true);
197
198 // go through and store a list of collection names for convenience
[28987]199 // also create a 'to' attribute for the next request to the MR, which
200 // is a ListSets request to each collection
[28879]201 Node child = this.collection_list.getFirstChild();
202 if (child == null) {
203 logger.error("collection list has no children");
[28857]204 noRecordsMatch = true;
205 return false;
206 }
[28879]207
[28857]208 this.collection_name_list = new Vector<String>();
[28879]209 StringBuffer to = new StringBuffer();
210 boolean first = true;
211 while (child != null) {
212 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
213 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
214 this.collection_name_list.add(coll_id);
215 if (!first) {
216 to.append(',');
217 }
218 first = false;
219 to.append(coll_id+"/"+OAIXML.LIST_SETS);
220 }
221 child = child.getNextSibling();
222 }
223 if (first) {
224 // we haven't found any collections
225 logger.error("found no collection elements in collectionList");
226 noRecordsMatch = true;
227 return false;
228 }
[28966]229 Document listsets_doc = XMLConverter.newDOM();
[28857]230 Element listsets_element = listsets_doc.createElement(OAIXML.LIST_SETS);
231 this.listsets_response = getMessage(listsets_doc, listsets_element);
232
233 // Now, for each collection, get a list of all its sets
234 // might include subsets (classifiers) or super colls
235 // We'll reuse the first message, changing its type and to atts
236 request.setAttribute(GSXML.TYPE_ATT, "");
237 request.setAttribute(GSXML.TO_ATT, to.toString());
238 // send to MR
239 msg_node = mr.process(message);
[28987]240 logger.error(XMLConverter.getPrettyString(msg_node));
[28857]241 NodeList response_list = ((Element)msg_node).getElementsByTagName(GSXML.RESPONSE_ELEM);
242 for (int c=0; c<response_list.getLength(); c++) {
243 // for each collection's response
244 Element response = (Element)response_list.item(c);
245 String coll_name = GSPath.getFirstLink(response.getAttribute(GSXML.FROM_ATT));
246 logger.error("coll from response "+coll_name);
247 NodeList set_list = response.getElementsByTagName(OAIXML.SET);
248 for (int j=0; j<set_list.getLength(); j++) {
249 // now check if it a super collection
250 Element set = (Element)set_list.item(j);
251 String set_spec = GSXML.getNodeText((Element)GSXML.getChildByTagName(set, OAIXML.SET_SPEC));
252 logger.error("set spec = "+set_spec);
253 // this may change if we add site name back in
254 // setSpecs will be collname or collname:subset or supercollname
255 if (set_spec.indexOf(":")==-1 && ! set_spec.equals(coll_name)) {
256 // it must be a super coll spec
257 logger.error("found super coll, "+set_spec);
258 // check that it is a valid one from config
[28885]259 if (this.has_super_colls == true && this.super_coll_data.containsKey(set_spec)) {
[28857]260 Vector <String> subcolls = this.super_coll_map.get(set_spec);
261 if (subcolls == null) {
262 logger.error("its new!!");
263 // not in there yet
264 subcolls = new Vector<String>();
265 this.set_set.add(set_spec);
266 this.super_coll_map.put(set_spec, subcolls);
267 // the first time a supercoll is mentioned, add into the set list
[28987]268 logger.error("finding the set info "+XMLConverter.getPrettyString(this.super_coll_data.get(set_spec)));
[28885]269 listsets_element.appendChild(GSXML.duplicateWithNewName(listsets_doc, this.super_coll_data.get(set_spec), OAIXML.SET, true));
[28857]270 }
271 // add this collection to the list for the super coll
272 subcolls.add(coll_name);
273 }
274 } else { // its either the coll itself or a subcoll
275 // add in the set
276 listsets_element.appendChild(listsets_doc.importNode(set, true));
277 this.set_set.add(set_spec);
278 }
279 } // for each set in the collection
280 } // for each OAI enabled collection
281 return true;
282 }
283
[28885]284 protected void resetMessageRouter() {
285 // we just need to send a configure request to MR
[28966]286 Document doc = XMLConverter.newDOM();
[28885]287 Element mr_request_message = doc.createElement(GSXML.MESSAGE_ELEM);
288 Element mr_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_SYSTEM, "", null);
289 mr_request_message.appendChild(mr_request);
290
291 Element system = doc.createElement(GSXML.SYSTEM_ELEM);
292 mr_request.appendChild(system);
293 system.setAttribute(GSXML.TYPE_ATT, GSXML.SYSTEM_TYPE_CONFIGURE);
294
295 Element response = (Element) this.mr.process(mr_request_message);
[28987]296 logger.error("configure response = "+XMLConverter.getPrettyString(response));
[28885]297 }
[14211]298 /** process using strings - just calls process using Elements */
299 public String process(String xml_in) {
300
[28987]301 Node message_node = XMLConverter.getDOM(xml_in);
[16688]302 Node page = process(message_node);
[28987]303 return XMLConverter.getString(page);
[14211]304 }
[16688]305
[28857]306 //Compose a message/response element used to send back to the OAIServer servlet.
[14211]307 //This method is only used within OAIReceptionist
[28857]308 private Element getMessage(Document doc, Element e) {
309 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
310 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
311 msg.appendChild(response);
312 response.appendChild(e);
[14211]313 return msg;
314 }
[28857]315
[14211]316 /** process - produce xml data in response to a request
317 * if something goes wrong, it returns null -
318 */
[16688]319 public Node process(Node message_node) {
[14211]320 logger.error("OAIReceptionist received request");
[16688]321
[28966]322 Element message = GSXML.nodeToElement(message_node);
[28987]323 logger.error(XMLConverter.getString(message));
[16688]324
[14211]325 // check that its a correct message tag
326 if (!message.getTagName().equals(GSXML.MESSAGE_ELEM)) {
327 logger.error(" Invalid message. GSDL message should start with <"+GSXML.MESSAGE_ELEM+">, instead it starts with:"+message.getTagName()+".");
[28857]328 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
[14211]329 }
330
331 // get the request out of the message - assume that there is only one
332 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
333 if (request == null) {
334 logger.error(" message had no request!");
[28857]335 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "Internal messaging error");
[14211]336 }
[28885]337
338 // special case, reset=true for reloading the MR and recept data
339 String reset = request.getAttribute("reset");
340 if (!reset.equals("")) {
341 resetMessageRouter();
342 configureSetInfo();
343 return OAIXML.createResetResponse(true);
344 }
345
346
[14211]347 //At this stage, the value of 'to' attribute of the request must be the 'verb'
348 //The only thing that the oai receptionist can be sure is that these verbs are valid, nothing else.
349 String verb = request.getAttribute(GSXML.TO_ATT);
350 if (verb.equals(OAIXML.IDENTIFY)) {
351 return doIdentify();
352 }
353 if (verb.equals(OAIXML.LIST_METADATA_FORMATS)) {
[28857]354 return doListMetadataFormats(request);
[14211]355 }
356 if (verb.equals(OAIXML.LIST_SETS)) {
[28857]357 // we have composed the list sets response on init
358 // Note this means that list sets never uses resumption tokens
359 return this.listsets_response;
[14211]360 }
361 if (verb.equals(OAIXML.GET_RECORD)) {
[28857]362 return doGetRecord(request);
[14211]363 }
364 if (verb.equals(OAIXML.LIST_IDENTIFIERS)) {
[28857]365 return doListIdentifiersOrRecords(request,OAIXML.LIST_IDENTIFIERS , OAIXML.HEADER);
[14211]366 }
367 if (verb.equals(OAIXML.LIST_RECORDS)) {
[28857]368 return doListIdentifiersOrRecords(request, OAIXML.LIST_RECORDS, OAIXML.RECORD);
[14211]369 }
[28857]370 // should never get here as verbs were checked in OAIServer
371 return OAIXML.createErrorMessage(OAIXML.BAD_VERB, "Unexpected things happened");
[14211]372
373 }
[28857]374
375
[14211]376 private int getResumeAfter() {
377 Element resume_after = (Element)GSXML.getChildByTagName(oai_config, OAIXML.RESUME_AFTER);
378 if(resume_after != null) return Integer.parseInt(GSXML.getNodeText(resume_after));
379 return -1;
380 }
[28857]381 private String getRepositoryIdentifier() {
382 Element ri = (Element)GSXML.getChildByTagName(oai_config, OAIXML.REPOSITORY_IDENTIFIER);
[27672]383 if (ri != null) {
384 return GSXML.getNodeText(ri);
385 }
386 return "";
387 }
[14211]388
[24440]389
[14211]390 /** if the param_map contains strings other than those in valid_strs, return false;
391 * otherwise true.
392 */
[28857]393 private boolean areAllParamsValid(HashMap<String, String> param_map, HashSet<String> valid_strs) {
[25635]394 ArrayList<String> param_list = new ArrayList<String>(param_map.keySet());
[14211]395 for(int i=0; i<param_list.size(); i++) {
[28857]396 logger.error("param, key = "+param_list.get(i)+", value = "+param_map.get(param_list.get(i)));
[25635]397 if (valid_strs.contains(param_list.get(i)) == false) {
[14211]398 return false;
399 }
400 }
401 return true;
402 }
[28857]403
404 private Element doListIdentifiersOrRecords(Element req, String verb, String record_type) {
405 // options: from, until, set, metadataPrefix, resumptionToken
[14211]406 // exceptions: badArgument, badResumptionToken, cannotDisseminateFormat, noRecordMatch, and noSetHierarchy
[25635]407 HashSet<String> valid_strs = new HashSet<String>();
[14211]408 valid_strs.add(OAIXML.FROM);
409 valid_strs.add(OAIXML.UNTIL);
410 valid_strs.add(OAIXML.SET);
411 valid_strs.add(OAIXML.METADATA_PREFIX);
412 valid_strs.add(OAIXML.RESUMPTION_TOKEN);
413
[28966]414 Document result_doc = XMLConverter.newDOM();
[28857]415 Element result_element = result_doc.createElement(verb);
416 boolean result_token_needed = false; // does this result need to include a
417 // resumption token
[23913]418
[28857]419 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
[14211]420
[28857]421 HashMap<String, String> param_map = GSXML.getParamMap(params);
[14211]422
[28857]423 // are all the params valid?
424 if (!areAllParamsValid(param_map, valid_strs)) {
425 logger.error("One of the params is invalid");
426 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "There was an invalid parameter");
427 // TODO, need to tell the user which one was invalid ??
428 }
[14211]429
[28857]430 // Do we have a resumption token??
431 String token = null;
432 String from = null;
433 String until = null;
434 boolean set_requested = false;
435 String set_spec_str = null;
436 String prefix_value = null;
437 int cursor = 0;
438 int current_cursor = 0;
439 String current_set = null;
[28881]440 long initial_time = 0;
[23913]441
[28857]442 int total_size = -1; // we are only going to set this in resumption
443 // token if it is easy to work out, i.e. not sending extra requests to
444 // MR just to calculate total size
[23913]445
[28857]446 if(param_map.containsKey(OAIXML.RESUMPTION_TOKEN)) {
447 // Is it an error to have other arguments? Do we need to check to make sure that resumptionToken is the only arg??
448 // validate resumptionToken
449 token = param_map.get(OAIXML.RESUMPTION_TOKEN);
450 logger.info("has resumptionToken " + token);
451 if(OAIResumptionToken.isValidToken(token) == false) {
452 logger.error("token is not valid");
453 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "");
[14211]454 }
[28857]455 result_token_needed = true; // we always need to send a token back if we have started with one. It may be empty if we are returning the end of the list
456 // initialise the request params from the stored token data
457 HashMap<String, String> token_data = OAIResumptionToken.getTokenData(token);
458 from = token_data.get(OAIXML.FROM);
459 until = token_data.get(OAIXML.UNTIL);
460 set_spec_str = token_data.get(OAIXML.SET);
461 if (set_spec_str != null) {
462 set_requested = true;
[14211]463 }
[28857]464 prefix_value = token_data.get(OAIXML.METADATA_PREFIX);
465 current_set = token_data.get(OAIResumptionToken.CURRENT_SET);
466 try {
467 cursor = Integer.parseInt(token_data.get(OAIXML.CURSOR));
468 cursor = cursor + resume_after; // increment cursor
469 current_cursor = Integer.parseInt(token_data.get(OAIResumptionToken.CURRENT_CURSOR));
[28881]470 initial_time = Long.parseLong(token_data.get(OAIResumptionToken.INITIAL_TIME));
[28857]471 } catch (NumberFormatException e) {
472 logger.error("tried to parse int from cursor data and failed");
473 }
[14211]474
[28881]475 // check that the collections/sets haven't changed since the token was issued
476 if (collectionsChangedSinceTime(set_spec_str, initial_time)) {
477 logger.error("one of the collections in set "+set_spec_str+" has changed since token issued. Expiring the token");
478 OAIResumptionToken.expireToken(token);
479 return OAIXML.createErrorMessage(OAIXML.BAD_RESUMPTION_TOKEN, "Repository data has changed since this token was issued. Resend original request");
480 }
[28857]481 }
482 else {
483 // no resumption token, lets check the other params
484 // there must be a metadataPrefix
485 if (!param_map.containsKey(OAIXML.METADATA_PREFIX)) {
486 logger.error("metadataPrefix param required");
487 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "metadataPrefix param required");
488 }
[14211]489
[28857]490 //if there are any date params, check they're of the right format
491 from = param_map.get(OAIXML.FROM);
492 if(from != null) {
[23913]493 Date from_date = OAIXML.getDate(from);
494 if(from_date == null) {
[28857]495 logger.error("invalid date: " + from);
496 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.FROM);
[23913]497 }
[28857]498 }
499 until = param_map.get(OAIXML.UNTIL);
500 if(until != null) {
501 Date until_date = OAIXML.getDate(until);
[23913]502 if(until_date == null) {
[28857]503 logger.error("invalid date: " + until);
504 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid format for "+ OAIXML.UNTIL);
[23913]505 }
[28857]506 }
507 if(from != null && until != null) { // check they are of the same date-time format (granularity)
[23913]508 if(from.length() != until.length()) {
[28857]509 logger.error("The request has different granularities (date-time formats) for the From and Until date parameters.");
510 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "The request has different granularities (date-time formats) for the From and Until date parameters.");
[23913]511 }
[14211]512 }
[28857]513
514 // check the set arg is a set we know about
515 set_requested = param_map.containsKey(OAIXML.SET);
516 set_spec_str = null;
517 if(set_requested == true) {
518 set_spec_str = param_map.get(OAIXML.SET);
519 if (!this.set_set.contains(set_spec_str)) {
520 // the set is not one we know about
521 logger.error("requested set is not found in this repository");
522 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "invalid set parameter");
523
524 }
[14211]525 }
[28857]526 // Is the metadataPrefix arg one this repository supports?
527 prefix_value = param_map.get(OAIXML.METADATA_PREFIX);
528 if (repositorySupportsMetadataPrefix(prefix_value) == false) {
529 logger.error("requested metadataPrefix is not found in OAIConfig.xml");
530 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "metadata format "+prefix_value+" not supported by this repository");
531 }
532
533 } // else no resumption token, check other params
[14211]534
[28857]535 // Whew. Now we have validated the params, we can work on doing the actual
536 // request
[23913]537
[24440]538
[28966]539 Document doc = XMLConverter.newDOM();
[28857]540 Element mr_msg = doc.createElement(GSXML.MESSAGE_ELEM);
541 Element mr_req = doc.createElement(GSXML.REQUEST_ELEM);
542 // TODO does this need a type???
543 mr_msg.appendChild(mr_req);
[24440]544
[28857]545 // copy in the from/until params if there
546 if (from != null) {
547 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.FROM, from));
[23913]548 }
[28857]549 if (until != null) {
550 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.UNTIL, until));
[23913]551 }
[28857]552 // add metadataPrefix
553 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.METADATA_PREFIX, prefix_value));
554
555 // do we have a set???
556 // if no set, we send to all collections in the collection list
557 // if super set, we send to all collections in super set list
558 // if a single collection, send to it
559 // if a subset, send to the collection
[28881]560 Vector<String> current_coll_list = getCollectionListForSet(set_spec_str);
[28857]561 boolean single_collection = false;
[28881]562 if (current_coll_list.size() == 1) {
563 single_collection = true;
[14211]564 }
[28881]565 if (set_spec_str != null && set_spec_str.indexOf(":") != -1) {
566 // we have a subset - add the set param back in
567 mr_req.appendChild(GSXML.createParameter(doc, OAIXML.SET, set_spec_str));
[28857]568 }
[14211]569
[28857]570 int num_collected_records = 0;
571 int start_point = current_cursor; // may not be 0 if we are using a resumption token
572 String resumption_collection = "";
573 boolean empty_result_token = false; // if we are sending the last part of a list, then the token value will be empty
574
575 // iterate through the list of collections and send the request to each
[24440]576
[28857]577 int start_coll=0;
578 if (current_set != null) {
579 // we are resuming a previous request, need to locate the first collection
580 for (int i=0; i<current_coll_list.size(); i++) {
581 if (current_set.equals(current_coll_list.get(i))) {
582 start_coll = i;
583 break;
584 }
585 }
[14211]586 }
587
[28857]588 for (int i=start_coll; i<current_coll_list.size(); i++) {
589 String current_coll = current_coll_list.get(i);
590 mr_req.setAttribute(GSXML.TO_ATT, current_coll+"/"+verb);
591
592 Element result = (Element)mr.process(mr_msg);
593 logger.error(verb+ " result for coll "+current_coll);
[28987]594 logger.error(XMLConverter.getPrettyString(result));
[28857]595 if (result == null) {
596 logger.info("message router returns null");
597 // do what??? carry on? fail??
598 return OAIXML.createErrorMessage("Internal service returns null", "");
599 }
600 Element res = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
[14211]601 if(res == null) {
602 logger.info("response element in xml_result is null");
[28857]603 return OAIXML.createErrorMessage("Internal service returns null", "");
[14211]604 }
[28857]605 NodeList record_list = res.getElementsByTagName(record_type);
606 int num_records = record_list.getLength();
607 if(num_records == 0) {
608 logger.info("message router returns 0 records for coll "+current_coll);
609 continue; // try the next collection
610 }
611 if (single_collection) {
612 total_size = num_records;
[14211]613 }
[28857]614 int records_to_add = (resume_after > 0 ? resume_after - num_collected_records : num_records);
615 if (records_to_add > (num_records-start_point)) {
616 records_to_add = num_records-start_point;
617 }
618 addRecordsToList(result_doc, result_element, record_list, start_point, records_to_add);
619 num_collected_records += records_to_add;
[14211]620
[28857]621 // do we need to stop here, and do we need to issue a resumption token?
622 if (resume_after > 0 && num_collected_records == resume_after) {
623 // we have finished collecting records at the moment.
624 // but are we conincidentally at the end? or are there more to go?
625 if (records_to_add < (num_records - start_point)) {
626 // we have added less than this collection had
627 start_point += records_to_add;
628 resumption_collection = current_coll;
629 result_token_needed = true;
630 }
631 else {
632 // we added all this collection had to offer
633 // is there another collection in the list??
634 if (i<current_coll_list.size()-1) {
635 result_token_needed = true;
636 start_point = 0;
637 resumption_collection = current_coll_list.get(i+1);
638 }
639 else {
640 // we have finished one collection and there are no more collection
641 // if we need to send a resumption token (in this case, only because we started with one, then it will be empty
642 logger.error("at end of list, need empty result token");
643 empty_result_token = true;
644 }
645 }
646 break;
647 }
648 start_point = 0; // only the first one will have start non-zero, if we
649 // have a resumption token
[14211]650
[28857]651 } // for each collection
[24440]652
[28857]653 if (num_collected_records ==0) {
654 // there were no matching results
655 return OAIXML.createErrorMessage(OAIXML.NO_RECORDS_MATCH, "");
656 }
657
658 if (num_collected_records < resume_after) {
659 // we have been through all collections, and there are no more
660 // if we need a result token - only because we started with one, so we need to send an empty one, then make sure everyone knows we are just sending an empty one
661 if (result_token_needed) {
662 empty_result_token = true;
663 }
664 }
665
666 if (result_token_needed) {
667 // we need a resumption token
668 if (empty_result_token) {
669 logger.error("have empty result token");
670 token = "";
[14211]671 } else {
[28857]672 if (token != null) {
673 // we had a token for this request, we can just update it
674 token = OAIResumptionToken.updateToken(token, ""+cursor, resumption_collection, ""+start_point);
675 } else {
676 // we are generating a new one
677 token = OAIResumptionToken.createAndStoreResumptionToken(set_spec_str, prefix_value, from, until, ""+cursor, resumption_collection, ""+start_point );
678 }
679 }
[14211]680
[28857]681 // result token XML
682 long expiration_date = -1;
683 if (empty_result_token) {
684 // we know how many records in total as we have sent them all
685 total_size = cursor+num_collected_records;
686 } else {
687 // non-empty token, set the expiration date
688 expiration_date = OAIResumptionToken.getExpirationDate(token);
689 }
690 Element token_elem = OAIXML.createResumptionTokenElement(result_doc, token, total_size, cursor, expiration_date);
691 // OAIXML.addToken(token_elem); // store it
692 result_element.appendChild(token_elem); // add to the result
693 }
694
695
696 return getMessage(result_doc, result_element);
697 }
698
[28881]699 private Vector<String> getCollectionListForSet(String set) {
700 if (set == null) {
701 // no set requested, need the complete collection list
702 return this.collection_name_list;
703 }
704 if (has_super_colls && super_coll_map.containsKey(set)) {
705 return super_coll_map.get(set);
706 }
[28882]707
[28881]708 Vector<String> coll_list = new Vector<String>();
709 if (set.indexOf(":") != -1) {
710 String col_name = set.substring(0, set.indexOf(":"));
711 coll_list.add(col_name);
712 }
713 else {
714 coll_list.add(set);
715 }
716 return coll_list;
717 }
[28857]718 private void addRecordsToList(Document doc, Element result_element, NodeList
719 record_list, int start_point, int num_records) {
720 int end_point = start_point + num_records;
721 for (int i=start_point; i<end_point; i++) {
722 result_element.appendChild(doc.importNode(record_list.item(i), true));
723 }
724 }
725
[14211]726 private Element collectAll(Element result, Element msg, String verb, String elem_name) {
727 if(result == null) {
728 //in the first round, result is null
729 return msg;
730 }
[28857]731 Element res_in_result = (Element)GSXML.getChildByTagName(result, GSXML.RESPONSE_ELEM);
[23901]732 if(res_in_result == null) { // return the results of all other collections accumulated so far
[28857]733 return msg;
[23901]734 }
[14211]735 Element verb_elem = (Element)GSXML.getChildByTagName(res_in_result, verb);
736 if(msg == null) {
737 return result;
738 }
739
740 //e.g., get all <record> elements from the returned message. There may be none of
741 //such element, for example, the collection service returned an error message
742 NodeList elem_list = msg.getElementsByTagName(elem_name);
743
744 for (int i=0; i<elem_list.getLength(); i++) {
745 verb_elem.appendChild(res_in_result.getOwnerDocument().importNode(elem_list.item(i), true));
746 }
747 return result;
748 }
[28857]749
750
751 /** there are three possible exception conditions: bad argument, idDoesNotExist, and noMetadataFormat.
752 * The first one is handled here, and the last two are processed by OAIPMH.
753 */
754 private Element doListMetadataFormats(Element req) {
[14211]755 //if the verb is ListMetadataFormats, there could be only one parameter: identifier
756 //, or there is no parameter; otherwise it is an error
[28987]757 //logger.info("" + XMLConverter.getString(msg));
[14211]758
[28857]759 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
[14211]760 Element param = null;
[28966]761 Document lmf_doc = XMLConverter.newDOM();
[14211]762 if(params.getLength() == 0) {
763 //this is requesting metadata formats for the whole repository
764 //read the oaiConfig.xml file, return the metadata formats specified there.
[28857]765 if (this.listmetadataformats_response != null) {
766 // we have already created it
767 return this.listmetadataformats_response;
768 }
769
770 Element list_metadata_formats = lmf_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
[28987]771 // get all the formats out of oai_config
772 NodeList formats = oai_config.getElementsByTagName(OAIXML.METADATA_FORMAT);
773 if (formats.getLength() ==0) {
774 logger.error("OAIConfig.xml must contain the supported metadata formats");
775 // TODO this is internal error, what to do???
776 return getMessage(lmf_doc, list_metadata_formats);
[27672]777 }
[28987]778
[27672]779 for(int i=0; i<formats.getLength(); i++) {
[29066]780 Element f = OAIXML.getMetadataFormatShort(lmf_doc, (Element)formats.item(i));
781 list_metadata_formats.appendChild(f);
782 }
[28987]783 this.listmetadataformats_response = getMessage(lmf_doc, list_metadata_formats);
784 return this.listmetadataformats_response;
[14211]785
[27672]786 }
787
788 if (params.getLength() > 1) {
[14211]789 //Bad argument. Can't be more than one parameters for ListMetadataFormats verb
[28857]790 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
[27672]791 }
792
793 // This is a request for the metadata of a particular item with an identifier
[28857]794 /**the request xml is in the form: <request>
795 * <param name=.../>
796 * </request>
797 *And there is a param element and one element only. (No paramList element in between).
798 */
799 param = (Element)params.item(0);
800 String param_name = param.getAttribute(GSXML.NAME_ATT);
801 String identifier = "";
802 if (!param_name.equals(OAIXML.IDENTIFIER)) {
803 //Bad argument
804 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
805 }
806
807 identifier = param.getAttribute(GSXML.VALUE_ATT);
808 // the identifier is in the form: <coll_name>:<OID>
[28987]809 // so it must contain at least one ':' characters
810 // (the oid itself may contain : chars)
811 String[] strs = identifier.split(":", 2);
812 if(strs.length != 2) {
[28857]813 logger.error("identifier is not in the form coll:id" + identifier);
814 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
815 }
[14211]816
[28857]817 // send request to message router
818 // get the names
819 String coll_name = strs[0];
820 String oid = strs[1];
[14211]821
[28987]822 Document msg_doc = XMLConverter.newDOM();
823 Element message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
[28857]824 String verb = req.getAttribute(GSXML.TO_ATT);
[28987]825 String new_to = coll_name + "/" + verb;
826 Element request = GSXML.createBasicRequest(msg_doc, "oai???", new_to, null);
827 message.appendChild(request);
828 // add the id param
829 GSXML.addParameterToList(request, OAIXML.OID, oid);
[14211]830
[28857]831 //Now send the request to the message router to process
832 Node result_node = mr.process(message);
[28966]833 return GSXML.nodeToElement(result_node);
[28857]834 }
[27672]835
[14211]836
[27672]837
838
839 private void copyNamedElementfromConfig(Element to_elem, String element_name) {
840 Element original_element = (Element)GSXML.getChildByTagName(oai_config, element_name);
841 if(original_element != null) {
[28987]842 GSXML.copyNode(to_elem, original_element);
[14211]843 }
844 }
[27672]845
846
[14211]847 private Element doIdentify() {
848 //The validation for this verb has been done in OAIServer.validate(). So no bother here.
849 logger.info("");
[27672]850 if (this.identify_response != null) {
851 // we have already created it
[28885]852 return getMessage(this.identify_response.getOwnerDocument(), this.identify_response);
[27672]853 }
[28966]854 Document doc = XMLConverter.newDOM();
[28857]855 Element identify = doc.createElement(OAIXML.IDENTIFY);
[14211]856 //do the repository name
[27672]857 copyNamedElementfromConfig(identify, OAIXML.REPOSITORY_NAME);
[14211]858 //do the baseurl
[27672]859 copyNamedElementfromConfig(identify, OAIXML.BASE_URL);
[14211]860 //do the protocol version
[27672]861 copyNamedElementfromConfig(identify, OAIXML.PROTOCOL_VERSION);
[23944]862
[14211]863 //There can be more than one admin email according to the OAI specification
864 NodeList admin_emails = GSXML.getChildrenByTagName(oai_config, OAIXML.ADMIN_EMAIL);
865 int num_admin = 0;
866 Element from_admin_email = null;
867 if (admin_emails != null) {
868 num_admin = admin_emails.getLength();
869 }
870 for (int i=0; i<num_admin; i++) {
[28987]871 GSXML.copyNode(identify, admin_emails.item(i));
[14211]872 }
873
874 //do the earliestDatestamp
875 //send request to mr to search through the earliest datestamp amongst all oai collections in the repository.
876 //ask the message router for a list of oai collections
[27672]877 //NodeList oai_coll = getOAICollectionList();
878 long earliestDatestamp = getEarliestDateStamp(collection_list);
[23913]879 String earliestDatestamp_str = OAIXML.getTime(earliestDatestamp);
[28857]880 Element earliestDatestamp_elem = doc.createElement(OAIXML.EARLIEST_DATESTAMP);
[14211]881 GSXML.setNodeText(earliestDatestamp_elem, earliestDatestamp_str);
882 identify.appendChild(earliestDatestamp_elem);
[23944]883
884 //do the deletedRecord
[27672]885 copyNamedElementfromConfig(identify, OAIXML.DELETED_RECORD);
[23944]886 //do the granularity
[27672]887 copyNamedElementfromConfig(identify, OAIXML.GRANULARITY);
888
889 // output the oai identifier
[28857]890 Element description = doc.createElement(OAIXML.DESCRIPTION);
[27672]891 identify.appendChild(description);
[28857]892 // TODO, make this a valid id
893 Element oaiIdentifier = OAIXML.createOAIIdentifierXML(doc, repository_id, "lucene-jdbm-demo", "ec159e");
[27672]894 description.appendChild(oaiIdentifier);
895
896 // if there are any oaiInfo metadata, add them in too.
897 Element info = (Element)GSXML.getChildByTagName(oai_config, OAIXML.OAI_INFO);
898 if (info != null) {
[28857]899 NodeList meta = GSXML.getChildrenByTagName(info, OAIXML.METADATA);
[27672]900 if (meta != null && meta.getLength() > 0) {
[28857]901 Element gsdl = OAIXML.createGSDLElement(doc);
[27672]902 description.appendChild(gsdl);
903 for (int m = 0; m<meta.getLength(); m++) {
[28987]904 GSXML.copyNode(gsdl, meta.item(m));
[27672]905 }
906
907 }
908 }
909 this.identify_response = identify;
[28857]910 return getMessage(doc, identify);
[14211]911 }
[28857]912 /** split the identifier into <collection + OID> as an array
913 It has already been checked that the 'identifier' contains at least one ':'
914 */
[28987]915 // private String[] splitNames(String identifier) {
916 // logger.info(identifier);
917 // String [] strs = new String[2];
918 // int first_colon = identifier.indexOf(":");
919 // if(first_colon == -1) {
920 // return null;
921 // }
922 // strs[0] = identifier.substring(0, first_colon);
923 // strs[1] = identifier.substring(first_colon + 1);
924 // return strs;
925 // }
[14211]926 /** validate if the specified metadata prefix value is supported by the repository
927 * by checking it in the OAIConfig.xml
928 */
[28857]929 private boolean repositorySupportsMetadataPrefix(String prefix_value) {
[14211]930 NodeList prefix_list = oai_config.getElementsByTagName(OAIXML.METADATA_PREFIX);
931
932 for(int i=0; i<prefix_list.getLength(); i++) {
933 if(prefix_value.equals(GSXML.getNodeText((Element)prefix_list.item(i)).trim() )) {
934 return true;
935 }
936 }
937 return false;
938 }
[28857]939 private Element doGetRecord(Element req){
[14211]940 logger.info("");
941 /** arguments:
942 identifier: required
943 metadataPrefix: required
[28857]944 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
945 */
[28966]946 Document doc = XMLConverter.newDOM();
[28857]947 Element get_record = doc.createElement(OAIXML.GET_RECORD);
[14211]948
[25635]949 HashSet<String> valid_strs = new HashSet<String>();
[14211]950 valid_strs.add(OAIXML.IDENTIFIER);
951 valid_strs.add(OAIXML.METADATA_PREFIX);
952
[28857]953 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
954 HashMap<String, String> param_map = GSXML.getParamMap(params);
[14211]955
[28857]956 if(!areAllParamsValid(param_map, valid_strs) ||
957 params.getLength() == 0 ||
958 param_map.containsKey(OAIXML.IDENTIFIER) == false ||
959 param_map.containsKey(OAIXML.METADATA_PREFIX) == false ) {
[14211]960 logger.error("must have the metadataPrefix/identifier parameter.");
[28857]961 return OAIXML.createErrorMessage(OAIXML.BAD_ARGUMENT, "");
[14211]962 }
963
[25635]964 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
965 String identifier = param_map.get(OAIXML.IDENTIFIER);
[14211]966
967 // verify the metadata prefix
[28857]968 if (repositorySupportsMetadataPrefix(prefix) == false) {
[14211]969 logger.error("requested prefix is not found in OAIConfig.xml");
[28857]970 return OAIXML.createErrorMessage(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
[14211]971 }
972
973 // get the names
[28987]974 String[] strs = identifier.split(":", 2);
[28857]975 if(strs == null || strs.length < 2) {
976 logger.error("identifier is not in the form coll:id" + identifier);
977 return OAIXML.createErrorMessage(OAIXML.ID_DOES_NOT_EXIST, "");
[14211]978 }
[28857]979 //String name_of_site = strs[0];
980 String coll_name = strs[0];
981 String oid = strs[1];
[14211]982
983 //re-organize the request element
984 // reset the 'to' attribute
[28857]985 String verb = req.getAttribute(GSXML.TO_ATT);
986 req.setAttribute(GSXML.TO_ATT, coll_name + "/" + verb);
[14211]987 // reset the identifier element
[28857]988 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.IDENTIFIER);
[14211]989 if (param != null) {
[28857]990 param.setAttribute(GSXML.NAME_ATT, OAIXML.OID);
991 param.setAttribute(GSXML.VALUE_ATT, oid);
[14211]992 }
993
994 //Now send the request to the message router to process
[28857]995 Element msg = doc.createElement(GSXML.MESSAGE_ELEM);
996 msg.appendChild(doc.importNode(req, true));
[16688]997 Node result_node = mr.process(msg);
[28966]998 return GSXML.nodeToElement(result_node);
[14211]999 }
[23913]1000
[28857]1001 // See OAIConfig.xml
1002 // dynamically works out what the earliestDateStamp is, since it varies by collection
1003 // returns this time in *milliseconds*.
[28879]1004 protected long getEarliestDateStamp(Element oai_coll_list) {
1005 // config earliest datstamp
1006 long config_datestamp = 0;
1007 Element config_datestamp_elem = (Element)GSXML.getChildByTagName(this.oai_config, OAIXML.EARLIEST_DATESTAMP);
1008 if (config_datestamp_elem != null) {
1009 String datest = GSXML.getNodeText(config_datestamp_elem);
1010 config_datestamp = OAIXML.getTime(datest);
1011 if (config_datestamp == -1) {
1012 config_datestamp = 0;
1013 }
1014 }
[28857]1015 //do the earliestDatestamp
[28879]1016 long current_time = System.currentTimeMillis();
1017 long earliestDatestamp = current_time;
1018 NodeList oai_coll = oai_coll_list.getElementsByTagName(GSXML.COLLECTION_ELEM);
[28857]1019 int oai_coll_size = oai_coll.getLength();
1020 if (oai_coll_size == 0) {
[28879]1021 logger.info("returned oai collection list is empty. Setting repository earliestDatestamp to be the earliest datestamp from OAIConfig.xml, or 1970-01-01 if not specified.");
1022 return config_datestamp;
[28857]1023 }
1024 // the earliestDatestamp is now stored as a metadata element in the collection's buildConfig.xml file
1025 // we get the earliestDatestamp among the collections
1026 for(int i=0; i<oai_coll_size; i++) {
1027 long coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.EARLIEST_DATESTAMP));
[28879]1028 if (coll_earliestDatestamp == 0) {
1029 // try last modified
1030 coll_earliestDatestamp = Long.parseLong(((Element)oai_coll.item(i)).getAttribute(OAIXML.LAST_MODIFIED));
1031 }
1032 if (coll_earliestDatestamp > 0) {
1033 earliestDatestamp = (earliestDatestamp > coll_earliestDatestamp)? coll_earliestDatestamp : earliestDatestamp;
1034 }
[28857]1035 }
[28879]1036 if (earliestDatestamp == current_time) {
1037 logger.info("no collection had a real datestamp, using value from OAIConfig");
1038 return config_datestamp;
1039 }
1040 return earliestDatestamp;
[28857]1041 }
[28881]1042
1043 private boolean collectionsChangedSinceTime(String set_spec_str, long initial_time) {
1044
1045 // we need to look though all collections in the set to see if any have last modified dates > initial_time
1046 Vector<String> set_coll_list = getCollectionListForSet(set_spec_str);
1047
1048 Node child = this.collection_list.getFirstChild();
1049 while (child != null) {
1050 if (child.getNodeName().equals(GSXML.COLLECTION_ELEM)) {
1051 String coll_id =((Element) child).getAttribute(GSXML.NAME_ATT);
1052 if (set_coll_list.contains(coll_id)) {
1053 long last_modified = Long.parseLong(((Element)child).getAttribute(OAIXML.LAST_MODIFIED));
1054 if (initial_time < last_modified) {
1055 return true;
1056 }
1057 }
1058 }
1059 child = child.getNextSibling();
1060 }
1061 return false;
1062
1063 }
1064
[14211]1065}
[28857]1066
1067
Note: See TracBrowser for help on using the repository browser.