source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/OAIXML.java@ 28871

Last change on this file since 28871 was 28871, checked in by kjdon, 10 years ago

added a comment

File size: 22.3 KB
Line 
1/*
2 * OAIXML.java
3 * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21import org.greenstone.util.GlobalProperties;
22
23import org.w3c.dom.*;
24
25import java.io.*;
26import java.net.*;
27import java.util.*;
28import java.text.DateFormat;
29import java.text.SimpleDateFormat;
30
31// import file Logger.java
32import org.apache.log4j.*;
33
34/** these constants are used for the OAI service */
35public class OAIXML {
36
37 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.GSXML.class.getName());
38
39 // the leading keyword of oai protocol
40 public static final String VERB = "verb";
41
42 // six valid oai verbs
43 public static final String GET_RECORD = "GetRecord";
44 public static final String LIST_RECORDS = "ListRecords";
45 public static final String LIST_IDENTIFIERS = "ListIdentifiers";
46 public static final String LIST_SETS = "ListSets";
47 public static final String LIST_METADATA_FORMATS = "ListMetadataFormats";
48 public static final String IDENTIFY = "Identify";
49
50 // oai request parameters
51 public static final String METADATA_PREFIX = "metadataPrefix";
52 public static final String FROM = "from";
53 public static final String UNTIL = "until";
54 public static final String SET = "set";
55 public static final String RESUMPTION_TOKEN = "resumptionToken";
56 public static final String IDENTIFIER = "identifier";
57
58 // Error element and code att
59 public static final String ERROR = "error";
60 public static final String CODE = "code";
61
62 // OAI error codes
63 public static final String BAD_ARGUMENT = "badArgument";
64 public static final String BAD_RESUMPTION_TOKEN = "badResumptionToken";
65 public static final String BAD_VERB = "badVerb";
66 public static final String CANNOT_DISSEMINATE_FORMAT = "cannotDisseminateFormat";
67 public static final String ID_DOES_NOT_EXIST = "idDoesNotExist";
68 public static final String NO_METADATA_FORMATS = "noMetadataFormats";
69 public static final String NO_RECORDS_MATCH = "noRecordsMatch";
70 public static final String NO_SET_HIERARCHY = "noSetHierarchy";
71
72
73 // words used to compose oai responses
74 // many of these used in OAIConfig too
75
76 // General
77 public static final String OAI_PMH = "OAI-PMH";
78 public static final String RESPONSE_DATE = "responseDate";
79 public static final String REQUEST = "request";
80
81 // Identify data
82 public static final String ADMIN_EMAIL = "adminEmail";
83 public static final String BASE_URL = "baseURL";
84 public static final String COMPRESSION = "compression";
85 public static final String DELETED_RECORD = "deletedRecord";
86 public static final String DESCRIPTION = "description";
87 public static final String EARLIEST_DATESTAMP = "earliestDatestamp";
88 public static final String GRANULARITY = "granularity";
89 public static final String PROTOCOL_VERSION = "protocolVersion";
90 public static final String REPOSITORY_NAME = "repositoryName";
91 public static final String OAI_IDENTIFIER = "oai-identifier";
92 public static final String SCHEME = "scheme";
93 public static final String REPOSITORY_IDENTIFIER = "repositoryIdentifier";
94 public static final String DELIMITER = "delimiter";
95 public static final String SAMPLE_IDENTIFIER = "sampleIdentifier";
96
97 // metadata formats
98 public static final String METADATA_FORMAT = "metadataFormat";
99 public static final String SCHEMA = "schema";
100 public static final String METADATA_NAMESPACE = "metadataNamespace";
101 public static final String OAI_DC = "oai_dc";
102 public static final String DC = "dc";
103
104 // record response data
105 // SET_SPEC
106 public static final String RECORD = "record";
107 public static final String HEADER = "header";
108 public static final String DATESTAMP = "datestamp";
109 public static final String METADATA = "metadata";
110
111 // list sets
112 // SET,
113 public static final String SET_NAME = "setName";
114 public static final String SET_SPEC = "setSpec";
115 public static final String SET_DESCRIPTION = "setDescription";
116
117 // resumption token element
118 public static final String RESUMPTION_TOKEN_ELEM = "resumptionToken";
119 public static final String EXPIRATION_DATE = "expirationDate";
120 public static final String COMPLETE_LIST_SIZE = "completeListSize";
121 public static final String CURSOR = "cursor";
122
123 // extra elements/attributes from OAIConfig
124 public static final String OAI_INFO = "oaiInfo";
125 public static final String USE_STYLESHEET = "useOAIStylesheet";
126 public static final String STYLESHEET = "OAIStylesheet";
127 public static final String RESUME_AFTER = "resumeAfter";
128 public static final String RESUMPTION_TOKEN_EXPIRATION = "resumptionTokenExpiration";
129 public static final String OAI_SUPER_SET = "oaiSuperSet";
130 public static final String MAPPING = "mapping";
131 public static final String MAPPING_LIST = "mappingList";
132
133 // code constants
134 public static final String GS_OAI_RESOURCE_URL = "gs.OAIResourceURL";
135 public static final String ILLEGAL_OAI_VERB = "Illegal OAI verb";
136 public static final String LASTMODIFIED = "lastmodified";
137 // // The node id in the collection database, which contains all the OIDs in the database
138 public static final String BROWSELIST = "browselist";
139 public static final String OAI_LASTMODIFIED = "oailastmodified";
140 public static final String OAIPMH = "OAIPMH";
141 public static final String OAI_SET_LIST = "oaiSetList";
142 public static final String OAI_SERVICE_UNAVAILABLE = "OAI service unavailable";
143 public static final String OID = "OID";
144
145 //system-dependent file separator, maybe '/' or '\'
146 public static final String FILE_SEPARATOR = File.separator;
147 public static final String OAI_VERSION1 = "1.0";
148 public static final String OAI_VERSION2 = "2.0";
149 /*************************above are final values****************************/
150
151
152 //initialized in getOAIConfigXML()
153 public static Element oai_config_elem = null;
154
155 //stores the date format "yyyy-MM-ddTHH:mm:ssZ"
156 // this is the granularity for datestamps
157 public static String granularity = "";
158
159 // http://www.openarchives.org/OAI/openarchivesprotocol.html#DatestampsRequests
160 // specifies that all repositories must support YYYY-MM-DD (yyyy-MM-dd in Java)
161 // this would be in addition to the other (optional) granularity of above that
162 // a repository may additionally choose to support.
163 public static final String default_granularity = "yyyy-MM-dd";
164
165 public static long token_expiration = 7200;
166 /** which version of oai that this oaiserver supports; default is 2.0
167 * initialized in getOAIConfigXML()
168 */
169 public static String oai_version = "2.0";
170 public static String baseURL = "";
171
172 /** Converter for parsing files and creating Elements */
173 public static XMLConverter converter = new XMLConverter();
174
175 public static String[] special_char = {"/", "?", "#", "=", "&", ":", ";", " ", "%", "+"};
176 public static String[] escape_sequence = {"%2F", "%3F", "%23", "%3D", "%26", "%3A", "%3B", "%20", "%25", "%2B"};
177
178 public static String getOAIVersion() {
179 return oai_version;
180 }
181
182 public static String getBaseURL() {
183 return baseURL;
184 }
185
186 /** Read in OAIConfig.xml (residing web/WEB-INF/classes/) and use it to configure the receptionist etc.
187 * the oai_version and baseURL variables are also set in here.
188 * The init() method is also called in here. */
189 public static Element getOAIConfigXML() {
190
191 File oai_config_file = null;
192
193 try {
194 URL oai_config_url = Class.forName("org.greenstone.gsdl3.OAIServer").getClassLoader().getResource("OAIConfig.xml");
195 if (oai_config_url == null) {
196 logger.error("couldn't find OAIConfig.xml via class loader");
197 return null;
198 }
199 oai_config_file = new File(oai_config_url.toURI());
200 if (!oai_config_file.exists()) {
201 logger.error(" oai config file: "+oai_config_file.getPath()+" not found!");
202 return null;
203 }
204 } catch(Exception e) {
205 logger.error("couldn't find OAIConfig.xml "+e.getMessage());
206 return null;
207 }
208
209 Document oai_config_doc = converter.getDOM(oai_config_file, "utf-8");
210 if (oai_config_doc != null) {
211 oai_config_elem = oai_config_doc.getDocumentElement();
212 } else {
213 logger.error("Failed to parse oai config file OAIConfig.xml.");
214 return null;
215 }
216
217 //initialize oai_version
218 Element protocol_version = (Element)GSXML.getChildByTagName(oai_config_elem, PROTOCOL_VERSION);
219 oai_version = GSXML.getNodeText(protocol_version).trim();
220
221 // initialize baseURL
222 Element base_url_elem = (Element)GSXML.getChildByTagName(oai_config_elem, BASE_URL);
223 baseURL = GSXML.getNodeText(base_url_elem);
224
225 //initialize token_expiration
226 Element expiration = (Element)GSXML.getChildByTagName(oai_config_elem, RESUMPTION_TOKEN_EXPIRATION);
227 String expire_str = GSXML.getNodeText(expiration).trim();
228 if (expiration != null && !expire_str.equals("")) {
229 token_expiration = Long.parseLong(expire_str);
230 }
231
232 // read granularity from the config file
233 Element granu_elem = (Element)GSXML.getChildByTagName(oai_config_elem, GRANULARITY);
234 //initialize the granu_str which might be used by other methods (eg, getDate())
235 granularity = GSXML.getNodeText(granu_elem).trim();
236
237 //change "yyyy-MM-ddTHH:mm:ssZ" to "yyyy-MM-dd'T'HH:mm:ss'Z'"
238 granularity = granularity.replaceAll("T", "'T'");
239 granularity = granularity.replaceAll("Z", "'Z'");
240 granularity = granularity.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
241 return oai_config_elem;
242 }
243
244 public static String[] getMetadataMapping(Element metadata_format) {
245
246 if (metadata_format == null) {
247 return null;
248 }
249 NodeList mappings = metadata_format.getElementsByTagName(MAPPING);
250 int size = mappings.getLength();
251 if (size == 0) {
252 logger.info("No metadata mappings are provided in OAIConfig.xml.");
253 return null;
254 }
255 String[] names = new String[size];
256 for (int i=0; i<size; i++) {
257 names[i] = GSXML.getNodeText((Element)mappings.item(i)).trim();
258 }
259 return names;
260
261 }
262
263 public static String[] getGlobalMetadataMapping(String prefix) {
264 Element list_meta_formats = (Element)GSXML.getChildByTagName(oai_config_elem, LIST_METADATA_FORMATS);
265 if(list_meta_formats == null) {
266 return null;
267 }
268 Element metadata_format = GSXML.getNamedElement(list_meta_formats, METADATA_FORMAT, METADATA_PREFIX, prefix);
269 if(metadata_format == null) {
270 return null;
271 }
272 return getMetadataMapping(metadata_format);
273 }
274
275
276 public static long getTokenExpiration() {
277 return token_expiration*1000; // in milliseconds
278 }
279
280 /** TODO: returns a basic response for appropriate oai version
281 *
282 */
283 public static Element createBasicResponse(Document doc, String verb, String[] pairs) {
284
285 Element response = createResponseHeader(doc, verb);
286
287 //set the responseDate and request elements accordingly
288 Element request_elem = (Element)GSXML.getChildByTagName(response, REQUEST);
289 if (verb.equals("")) {
290 request_elem.setAttribute(VERB, verb);
291 }
292 int num_pairs = (pairs==null)? 0 : pairs.length;
293 for (int i=num_pairs - 1; i>=0; i--) {
294 int index = pairs[i].indexOf("=");
295 if (index != -1) {
296 String[] strs = pairs[i].split("=");
297 if(strs != null && strs.length == 2) {
298 request_elem.setAttribute(strs[0], oaiDecode(strs[1]));
299 }
300 }
301 }//end of for()
302
303 GSXML.setNodeText(request_elem, baseURL);
304
305 Node resp_date = GSXML.getChildByTagName(response, RESPONSE_DATE);
306 if (resp_date != null) {
307 GSXML.setNodeText((Element)resp_date, getCurrentUTCTime());
308 }
309
310 return response;
311 }
312 /** @param error_code the value of the code attribute
313 * @param error_text the node text of the error element
314 * @return an oai error <message><response><error>
315 */
316 public static Element createErrorMessage(String error_code, String error_text) {
317 Document doc = converter.newDOM();
318 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
319 Element resp = doc.createElement(GSXML.RESPONSE_ELEM);
320 message.appendChild(resp);
321 Element error = createErrorElement(doc, error_code, error_text);
322 resp.appendChild(error);
323 return message;
324 }
325
326 /** @param error_code the value of the code attribute
327 * @param error_text the node text of the error element
328 * @return an oai error <response><error>
329 */
330 public static Element createErrorResponse(String error_code, String error_text) {
331 Document doc = converter.newDOM();
332 Element resp = doc.createElement(GSXML.RESPONSE_ELEM);
333 Element error = createErrorElement(doc, error_code, error_text);
334 resp.appendChild(error);
335 return resp;
336 }
337
338 /** @param error_code the value of the code attribute
339 * @param error_text the node text of the error element
340 * @return an oai error <error>
341 */
342 public static Element createErrorElement(Document doc, String error_code, String error_text) {
343 Element error = doc.createElement(ERROR);
344 error.setAttribute(CODE, error_code);
345 GSXML.setNodeText(error, error_text);
346 return error;
347 }
348
349 /** convert the escaped sequences (eg, '%3A') of those special characters back to their
350 * original form (eg, ':').
351 */
352 public static String oaiDecode(String escaped_str) {
353 logger.info("oaiDecode() " +escaped_str);
354 for (int i=0; i<special_char.length; i++) {
355 if (escaped_str.indexOf(escape_sequence[i]) != -1) {
356 escaped_str = escaped_str.replaceAll(escape_sequence[i], special_char[i]);
357 }
358 }
359 return escaped_str;
360 }
361 /** convert those special characters (eg, ':') to their
362 * escaped sequences (eg, '%3A').
363 */
364 public static String oaiEncode(String original_str) {
365 logger.info("oaiEncode() " + original_str);
366 for (int i=0; i<special_char.length; i++) {
367 if (original_str.indexOf(special_char[i]) != -1) {
368 original_str = original_str.replaceAll(special_char[i], escape_sequence[i]);
369 }
370 }
371 return original_str;
372 }
373 /** convert YYYY-MM_DDThh:mm:ssZ to yyyy-MM-ddTHH:mm:ssZ
374 */
375 public static String convertToJava(String oai_format) {
376 oai_format = oai_format.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
377 return oai_format;
378 }
379 /** convert yyyy-MM-ddTHH:mm:ssZ to YYYY-MM_DDThh:mm:ssZ
380 */
381 public static String convertToOAI(String java_format) {
382 java_format = java_format.replaceAll("yyyy", "YYYY").replaceAll("dd", "DD").replaceAll("HH", "hh");
383 return java_format;
384 }
385 public static String getCurrentUTCTime() {
386 Date current_utc = new Date(System.currentTimeMillis());
387 //granularity is in the form: yyyy-MM-dd'T'HH:mm:ss'Z '
388 DateFormat formatter = new SimpleDateFormat(granularity);
389 return formatter.format(current_utc);
390 }
391 /** get a Date object from a Date format pattern string
392 *
393 * @param pattern - in the form: 2007-06-14T16:48:25Z, for example.
394 * @return a Date object - null if the pattern is not in the specified form
395 */
396
397 public static Date getDate(String pattern) {
398 if (pattern == null || pattern.equals("")) {
399 return null;
400 }
401 Date date = null;
402 // String str = pattern.replaceAll("T", " ");
403 // str = str.replaceAll("Z", "");
404 SimpleDateFormat sdf = null;
405 try {
406 sdf = new SimpleDateFormat(granularity);
407 date = sdf.parse(pattern);
408 } catch(Exception e) {
409 if(!default_granularity.equals(granularity)) { // try validating against default granularity
410 try {
411 date = null;
412 sdf = null;
413 sdf = new SimpleDateFormat(default_granularity);
414 date = sdf.parse(pattern);
415 } catch(Exception ex) {
416 logger.error("invalid date format: " + pattern);
417 return null;
418 }
419 } else {
420 logger.error("invalid date format: " + pattern);
421 return null;
422 }
423 }
424 return date;
425 }
426 /** get the million second value from a string representing time in a pattern
427 * (eg, 2007-06-14T16:48:25Z)
428 */
429 public static long getTime(String pattern) {
430 if (pattern == null || pattern.equals("")) {
431 return -1;
432 }
433 Date date = null;
434 SimpleDateFormat sdf = null;
435 try {
436 //granularity is a global variable in the form: yyyy-MM-ddTHH:mm:ssZ
437 sdf = new SimpleDateFormat(granularity);
438 date = sdf.parse(pattern);
439 } catch(Exception e) {
440 if(!default_granularity.equals(granularity)) { // try validating against default granularity
441 try {
442 date = null;
443 sdf = null;
444 sdf = new SimpleDateFormat(default_granularity);
445 date = sdf.parse(pattern);
446 } catch(Exception ex) {
447 logger.error("invalid date format: " + pattern);
448 return -1;
449 }
450 } else {
451 logger.error("invalid date format: " + pattern);
452 return -1;
453 }
454 }
455 return date.getTime();
456 }
457 /** get the string representation of a time from a long value(long type)
458 */
459 public static String getTime(long milliseconds) {
460 Date date = new Date(milliseconds);
461 SimpleDateFormat sdf = new SimpleDateFormat(granularity);
462 return sdf.format(date);
463 }
464 public static Element createResponseHeader(Document response_doc, String verb) {
465 String tag_name = (oai_version.equals(OAI_VERSION2))? OAI_PMH : verb;
466 Element oai = response_doc.createElement(tag_name);
467 Element resp_date = response_doc.createElement(RESPONSE_DATE);
468 Element req = response_doc.createElement(REQUEST);
469 oai.appendChild(resp_date);
470 oai.appendChild(req);
471
472 if(oai_version.equals(OAI_VERSION2)) {
473 oai.setAttribute("xmlns", "http://www.openarchives.org/OAI/2.0/");
474 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
475 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/ \n http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd");
476 } else {
477 oai.setAttribute("xmlns", "http://www.openarchives.com/OAI/1.1/OAI_" + verb);
478 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
479 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/OAI_" + verb + "\n http://www.openarchives.org/OAI/1.1/OAI_" + verb + ".xsd");
480 }
481 return oai;
482 }
483 public static Element getMetadataPrefixElement(Document doc, String tag_name, String version) {
484 //examples of tag_name: dc, oai_dc:dc, etc.
485 Element oai = doc.createElement(tag_name);
486 if (version.equals(OAI_VERSION2)) {
487 oai.setAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
488 oai.setAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
489 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
490 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ \n http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
491 } else {
492 oai.setAttribute("xmlns", "http://www.openarchives.com/OAI/1.1/");
493 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
494 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/" + tag_name + ".xsd");
495 }
496
497 return oai;
498 }
499 public static HashMap<String, Node> getChildrenMapByTagName(Node n, String tag_name) {
500
501 HashMap<String, Node> map= new HashMap<String, Node>();
502 Node child = n.getFirstChild();
503 while (child!=null) {
504 String name = child.getNodeName();
505 if(name.equals(tag_name)) {
506 map.put(name, child);
507 }
508 child = child.getNextSibling();
509 }
510 return map;
511 }
512
513 public static Element createOAIIdentifierXML(Document doc, String repository_id, String sample_collection, String sample_doc_id) {
514 String xml = "<oai-identifier xmlns=\"http://www.openarchives.org/OAI/2.0/oai-identifier\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai-identifier\n http://www.openarchives.org/OAI/2.0/oai-identifier.xsd\">\n <scheme>oai</scheme>\n<repositoryIdentifier>" + repository_id + "</repositoryIdentifier>\n<delimiter>:</delimiter>\n<sampleIdentifier>oai:"+repository_id+":"+sample_collection+":"+sample_doc_id+"</sampleIdentifier>\n</oai-identifier>";
515
516 Document xml_doc = converter.getDOM(xml);
517 return (Element)doc.importNode(xml_doc.getDocumentElement(), true);
518
519
520 }
521
522 public static Element createGSDLElement(Document doc) {
523 String xml = "<gsdl xmlns=\"http://www.greenstone.org/namespace/gsdl_oaiinfo/1.0/gsdl_oaiinfo\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:schemaLocation=\"http://www.greenstone.org/namespace/gsdl_oaiinfo/1.0/gsdl_oaiinfo\n http://www.greenstone.org/namespace/gsdl_oaiinfo/1.0/gsdl_oaiinfo.xsd\"></gsdl>";
524 Document xml_doc = converter.getDOM(xml);
525 return (Element)doc.importNode(xml_doc.getDocumentElement(), true);
526
527
528 }
529
530 public static Element createSet(Document doc, String spec, String name, String description) {
531
532 Element set_elem = doc.createElement(SET);
533 Element set_spec = doc.createElement(SET_SPEC);
534 GSXML.setNodeText(set_spec, spec);
535 set_elem.appendChild(set_spec);
536 Element set_name = doc.createElement(SET_NAME);
537 GSXML.setNodeText(set_name, name);
538 set_elem.appendChild(set_name);
539 if (description != null) {
540 Element set_description = doc.createElement(SET_DESCRIPTION);
541 GSXML.setNodeText(set_description, description);
542 set_elem.appendChild(set_description);
543 }
544 return set_elem;
545
546 }
547
548 /** returns the resumptionToken element to go into an OAI response */
549 public static Element createResumptionTokenElement(Document doc, String token_name, int total_size, int cursor, long expiration_time) {
550 Element token = doc.createElement(OAIXML.RESUMPTION_TOKEN);
551 if (total_size != -1) {
552 token.setAttribute(OAIXML.COMPLETE_LIST_SIZE, "" + total_size);
553 }
554 if (cursor != -1) {
555 token.setAttribute(OAIXML.CURSOR, "" + cursor);
556 }
557 if(expiration_time !=-1) {
558 token.setAttribute(OAIXML.EXPIRATION_DATE, getTime(expiration_time));
559 }
560
561 if (token != null) {
562 GSXML.setNodeText(token, token_name);
563 }
564 return token;
565 }
566
567}
568
569
570
571
572
573
Note: See TracBrowser for help on using the repository browser.