source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/OAIXML.java@ 23938

Last change on this file since 23938 was 23938, checked in by ak19, 13 years ago

GS3's OAIserver passes final official oaiserver validation tests: to do with earliestDatestamp. Both the datestamp of the records (documents) returned by listRecords, listIdentifiers and getRecord, as well as the earliestDatestamp returned by an Identify request are now in sync with each other. Related code changes made to perllib to write the earliestDatestamp into GS3's buildconfig.xml (and build.cfg for GS2), and to write new fields oailastmodified and oailastmodifieddate into the collection's database for each document.

File size: 29.7 KB
Line 
1/*
2 * OAIXML.java
3 * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21import org.greenstone.util.GlobalProperties;
22
23import org.w3c.dom.*;
24import java.io.*;
25import java.net.*;
26import java.util.*;
27import java.text.DateFormat;
28import java.text.SimpleDateFormat;
29import org.apache.xerces.parsers.*;
30import org.apache.xml.serialize.*;
31
32// SAX
33import org.xml.sax.XMLReader;
34import org.xml.sax.SAXException;
35import org.xml.sax.SAXParseException;
36import org.xml.sax.helpers.DefaultHandler;
37import org.xml.sax.InputSource;
38
39// JAXP
40import javax.xml.parsers.FactoryConfigurationError;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43import javax.xml.parsers.SAXParserFactory;
44
45// import file Logger.java
46import org.apache.log4j.*;
47
48/** these constants are used for the OAI service */
49public class OAIXML {
50
51 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.GSXML.class.getName());
52
53 // the leading keyword of oai protocol
54 public static final String VERB = "verb";
55
56 // six valid oai verbs
57 public static final String GET_RECORD = "GetRecord";
58 public static final String LIST_RECORDS = "ListRecords";
59 public static final String LIST_IDENTIFIERS = "ListIdentifiers";
60 public static final String LIST_SETS = "ListSets";
61 public static final String LIST_METADATA_FORMATS = "ListMetadataFormats";
62 public static final String IDENTIFY = "Identify";
63
64 // other valid oai parameters
65 public static final String OAI_METADATAFORMAT = "OAIMetadataFormat";
66 public static final String METADATA_NAMESPACE = "metadataNamespace";
67 public static final String OAI_DC = "oai_dc";
68 public static final String DC = "dc";
69 public static final String METADATA_PREFIX = "metadataPrefix";
70 public static final String FROM = "from";
71 public static final String UNTIL = "until";
72 public static final String SET = "set";
73 public static final String RESUMPTION_TOKEN = "resumptionToken";
74 public static final String RESUMPTION_TOKEN_EXPIRATION = "resumptionTokenExpiration";
75 public static final String IDENTIFIER = "identifier";
76
77 public static final String USE_STYLESHEET = "useOAIStylesheet";
78 public static final String STYLESHEET = "OAIStylesheet";
79 // words used to compose oai responses
80 public static final String ADMIN_EMAIL = "adminEmail";
81 public static final String BAD_ARGUMENT = "badArgument";
82 public static final String BAD_RESUMPTION_TOKEN = "badResumptionToken";
83 public static final String BAD_VERB = "badVerb";
84 public static final String BASE_URL = "baseURL";
85 public static final String CANNOT_DISSEMINATE_FORMAT = "cannotDisseminateFormat";
86 public static final String CODE = "code";
87 public static final String COLLECTION = "collection";
88 public static final String COLLECTION_LIST = "collectionList";
89 public static final String COMPLETE_LIST_SIZE = "completeListSize";
90 public static final String COMPRESSION = "compression";
91 public static final String CURSOR = "cursor";
92 public static final String DATESTAMP = "datestamp";
93 public static final String DELETED_RECORD = "deletedRecord";
94 public static final String DESCRIPTION = "description";
95 public static final String EARLIEST_DATESTAMP = "earliestDatestamp";
96 public static final String ERROR = "error";
97 public static final String EXPIRATION_DATE = "expirationDate";
98 public static final String GRANULARITY = "granularity";
99 public static final String GS3OAI = "GS3OAI";
100 public static final String HAS_OAI = "hasOAI";
101 public static final String HEADER = "header";
102 public static final String ILLEGAL_OAI_VERB = "Illegal OAI verb";
103 public static final String INDEX_STEM = "indexStem";
104 public static final String LASTMODIFIED = "lastmodified";
105 public static final String MAPPING = "mapping";
106 public static final String MAPPING_LIST = "mappingList";
107 public static final String MESSAGE = "message";
108 public static final String METADATA = "metadata";
109 public static final String METADATA_FORMAT = "metadataFormat";
110 public static final String NAME = "name";
111 public static final String NO_RECORDS_MATCH = "noRecordsMatch";
112 public static final String OAI = "OAI";
113 public static final String OAI_DASH_PMH = "OAI-PMH";
114 public static final String OAI_LASTMODIFIED = "oailastmodified";
115 public static final String OAIPMH = "OAIPMH";
116 public static final String OAI_RESUMPTION_TOKENS = "OAIResumptionTokens";
117 public static final String OAI_SERVICE = "oaiService";
118 public static final String OAI_SET_LIST = "oaiSetList";
119 public static final String OAI_SERVICE_UNAVAILABLE = "OAI service unavailable";
120 public static final String OID = "OID";
121 public static final String PARAM = "param";
122 public static final String PARAM_LIST = "paramList";
123 public static final String PROTOCOL_VERSION = "protocolVersion";
124 public static final String RECORD = "record";
125 public static final String REQUEST = "request";
126 public static final String REPOSITORY_NAME = "repositoryName";
127 public static final String RESPONSE = "response";
128 public static final String RESPONSE_DATE = "responseDate";
129 public static final String RESUME_AFTER = "resumeAfter";
130 public static final String SCHEMA = "schema";
131 public static final String SERVICE = "service";
132 public static final String SERVICE_UNAVAILABLE = "service unavailable";
133 public static final String SET_SPEC = "setSpec";
134 public static final String SET_NAME = "setName";
135 public static final String SET_DESCRIPTION = "setDescription";
136 public static final String SITE = "site";
137 public static final String TO = "to";
138 public static final String TYPE = "type";
139 public static final String VALUE = "value";
140
141 //Two error and exception conditions for the verb 'ListMetadataFormats'
142 public static final String ID_DOES_NOT_EXIST = "idDoesNotExist";
143 public static final String NO_METADATA_FORMATS = "noMetadataFormats";
144
145 // The node id in the collection database, which contains all the OIDs in the database
146 public static final String BROWSELIST = "browselist";
147
148 //system-dependent file separator, maybe '/' or '\'
149 public static final String FILE_SEPARATOR = File.separator;
150 public static final String OAI_VERSION1 = "1.0";
151 public static final String OAI_VERSION2 = "2.0";
152 /*************************above are final values****************************/
153
154 public static Element resumption_token_elem = null;
155 //used when saving the token file
156 public static File resumption_token_file = null;
157 //public static ArrayList token_list = new ArrayList();
158
159 //initialized in getOAIConfigXML()
160 public static Element oai_config_elem = null;
161
162 //stores the date format "yyyy-MM-ddTHH:mm:ssZ"
163 public static String granularity = "";
164
165 // http://www.openarchives.org/OAI/openarchivesprotocol.html#DatestampsRequests
166 // specifies that all repositories must support YYYY-MM-DD (yyyy-MM-dd in Java)
167 // this would be in addition to the other (optional) granularity of above that
168 // a repository may additionally choose to support.
169 public static final String default_granularity = "yyyy-MM-dd";
170
171 //this value is overriden in getOAIConfigXML()
172 public static long token_expiration = 7200;
173
174 /** which version of oai that this oaiserver supports; default is 2.0
175 * initialized in getOAIConfigXML()
176 */
177 public static String oai_version = "2.0";
178
179 /**response owner document */
180 public static Document response_doc = new XMLConverter().newDOM();
181
182 public static String[] special_char = {"/", "?", "#", "=", "&", ":", ";", " ", "%", "+"};
183 public static String[] escape_sequence = {"%2F", "%3F", "%23", "%3D", "%26", "%3A", "%3B", "%20", "%25", "%2B"};
184// /** key=special character; value=escaped sequence */
185// public static HashMap encode_map = new HashMap();
186// /** key=escaped sequence; value=special character */
187// public static HashMap decode_map = new HashMap();
188
189 public static void init() {
190 resumption_token_elem = getOAIResumptionTokenXML();
191 }
192 public static String getOAIVersion() {
193 return oai_version;
194 }
195 public static Element createElement(String tag_name) {
196 return response_doc.createElement(tag_name);
197 }
198 /**Compose a response element used when OAIPMH service sending responses thru
199 * ServiceCluster and MessageRouter, as they automatically wrap a message element
200 * on this response element
201 */
202 public static Element getResponse(Element core_msg) {
203 Element res = createElement(RESPONSE);
204 res.appendChild(response_doc.importNode(core_msg, true));
205 return res;
206 }
207 /** Read in OAIResumptionToken.xml (residing web/WEB-INF/classes/) */
208 public static Element getOAIResumptionTokenXML() {
209
210 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
211 resumption_token_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
212 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIResumptionToken.xml");
213 if (resumption_token_file.exists()) {
214 Document token_doc = parseXMLFile(resumption_token_file);
215 if (token_doc != null) {
216 resumption_token_elem = token_doc.getDocumentElement();
217 } else {
218 logger.error("Fail to parse resumption token file OAIReceptionToken.xml.");
219 return null;
220 }
221 //remove all expired tokens
222 clearExpiredTokens();
223 return resumption_token_elem;
224 }
225 //if resumption_token_file does not exist
226 logger.info("resumption token file: "+ resumption_token_file.getPath()+" not found! create an empty one.");
227 resumption_token_elem = createElement(OAI_RESUMPTION_TOKENS);
228 saveOAIResumptionTokenXML(resumption_token_elem);
229 return resumption_token_elem;
230 }
231 public static void saveOAIResumptionTokenXML(Element token_elem) {
232 if(writeXMLFile(resumption_token_file, token_elem.getOwnerDocument()) == false) {
233 logger.error("Fail to save the resumption token file");
234 }
235 }
236 public static void clearExpiredTokens() {
237 boolean token_deleted = false;
238 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, RESUMPTION_TOKEN);
239 for (int i=0; i<tokens.getLength(); i++) {
240 Element token_elem = (Element)tokens.item(i);
241 String expire_str = token_elem.getAttribute(EXPIRATION_DATE);
242 long datestamp = getTime(expire_str); // expire_str is in milliseconds
243 if(datestamp < System.currentTimeMillis()) {
244 resumption_token_elem.removeChild(token_elem);
245 token_elem = null;
246 token_deleted = true;
247 }
248 }
249
250 if(token_deleted) {
251 saveOAIResumptionTokenXML(resumption_token_elem);
252 }
253 }
254 public static boolean containsToken(String token) {
255 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
256 for (int i=0; i<tokens.getLength(); i++) {
257 if(token.equals(GSXML.getNodeText((Element)tokens.item(i)).trim() ))
258 return true;
259 }
260 return false;
261 }
262 public static void addToken(Element token) {
263 Document doc = resumption_token_elem.getOwnerDocument();
264 resumption_token_elem.appendChild(duplicateElement(doc, token, true));
265 saveOAIResumptionTokenXML(resumption_token_elem);
266 }
267 public static void addToken(String token) {
268 Element te = resumption_token_elem.getOwnerDocument().createElement(OAIXML.RESUMPTION_TOKEN);
269 //add expiration att
270 resumption_token_elem.appendChild(te);
271 saveOAIResumptionTokenXML(resumption_token_elem);
272 }
273 public static boolean removeToken(String token) {
274 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
275 int num_tokens = tokens.getLength();
276 for (int i=0; i<num_tokens; i++) {
277 Element e = (Element)(tokens.item(i));
278 if(token.equals(GSXML.getNodeText(e))) {
279 resumption_token_elem.removeChild(e);
280 saveOAIResumptionTokenXML(resumption_token_elem);
281 return true;
282 }
283 }
284 return false;
285 }
286 /** Read in OAIConfig.xml (residing web/WEB-INF/classes/) and use it to configure the receptionist etc.
287 * the oai_version variable is also set in here.
288 * The init() method is also called in here. */
289 public static Element getOAIConfigXML() {
290 init();
291
292 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
293 File oai_config_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
294 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIConfig.xml");
295 if (!oai_config_file.exists()) {
296 logger.error(" oai config file: "+oai_config_file.getPath()+" not found!");
297 return null;
298 }
299 Document oai_config_doc = parseXMLFile(oai_config_file);
300 if (oai_config_doc != null) {
301 oai_config_elem = oai_config_doc.getDocumentElement();
302 } else {
303 logger.error("Fail to parse oai config file OAIConfig.xml.");
304 return null;
305 }
306
307 //initialize oai_version
308 Element protocol_version = (Element)GSXML.getChildByTagName(oai_config_elem, PROTOCOL_VERSION);
309 oai_version = GSXML.getNodeText(protocol_version).trim();
310
311 //initialize token_expiration
312 Element expiration = (Element)GSXML.getChildByTagName(oai_config_elem, RESUMPTION_TOKEN_EXPIRATION);
313 String expire_str = GSXML.getNodeText(expiration).trim();
314 if (expiration != null && !expire_str.equals("")) {
315 token_expiration = Long.parseLong(expire_str);
316 }
317
318 // read granularity from the config file
319 Element granu_elem = (Element)GSXML.getChildByTagName(oai_config_elem, GRANULARITY);
320 //initialize the granu_str which might be used by other methods (eg, getDate())
321 granularity = GSXML.getNodeText(granu_elem).trim();
322
323 //change "yyyy-MM-ddTHH:mm:ssZ" to "yyyy-MM-dd'T'HH:mm:ss'Z'"
324 granularity = granularity.replaceAll("T", "'T'");
325 granularity = granularity.replaceAll("Z", "'Z'");
326 granularity = granularity.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
327 return oai_config_elem;
328 }
329
330 public static String[] getMetadataMapping(Element metadata_format) {
331
332 if (metadata_format == null) {
333 return null;
334 }
335 NodeList mappings = metadata_format.getElementsByTagName(MAPPING);
336 int size = mappings.getLength();
337 if (size == 0) {
338 logger.info("No metadata mappings are provided in OAIConfig.xml.");
339 return null;
340 }
341 String[] names = new String[size];
342 for (int i=0; i<size; i++) {
343 names[i] = GSXML.getNodeText((Element)mappings.item(i)).trim();
344 }
345 return names;
346
347 }
348
349 public static String[] getGlobalMetadataMapping(String prefix) {
350 Element list_meta_formats = (Element)GSXML.getChildByTagName(oai_config_elem, LIST_METADATA_FORMATS);
351 if(list_meta_formats == null) {
352 return null;
353 }
354 Element metadata_format = GSXML.getNamedElement(list_meta_formats, METADATA_FORMAT, METADATA_PREFIX, prefix);
355 if(metadata_format == null) {
356 return null;
357 }
358 return getMetadataMapping(metadata_format);
359 }
360
361
362 public static long getTokenExpiration() {
363 return token_expiration*1000; // in milliseconds
364 }
365
366 /** TODO: returns a basic response for appropriate oai version
367 *
368 */
369 public static Element createBasicResponse(String verb, String[] pairs) {
370
371 Element response = createResponseHeader(verb);
372
373 //set the responseDate and request elements accordingly
374 Element request_elem = (Element)GSXML.getChildByTagName(response, REQUEST);
375 if (verb.equals("")) {
376 request_elem.setAttribute(VERB, verb);
377 }
378 int num_pairs = (pairs==null)? 0 : pairs.length;
379 for (int i=num_pairs - 1; i>=0; i--) {
380 int index = pairs[i].indexOf("=");
381 if (index != -1) {
382 String[] strs = pairs[i].split("=");
383 if(strs != null && strs.length == 2) {
384 request_elem.setAttribute(strs[0], oaiDecode(strs[1]));
385 }
386 }
387 }//end of for()
388 Element base_url_elem = (Element)GSXML.getChildByTagName(oai_config_elem, BASE_URL);
389 String base_url = GSXML.getNodeText(base_url_elem);
390 GSXML.setNodeText(request_elem, base_url);
391
392 Node resp_date = GSXML.getChildByTagName(response, RESPONSE_DATE);
393 if (resp_date != null) {
394 GSXML.setNodeText((Element)resp_date, getCurrentUTCTime());
395 }
396
397 return response;
398 }
399 /** @param error_code the value of the code attribute
400 * @param error_text the node text of the error element
401 * @return an oai error element
402 * Used by receptionist
403 */
404 public static Element createErrorElement(String error_code, String error_text) {
405 Element error = createElement(ERROR);
406 error.setAttribute(CODE, error_code);
407 GSXML.setNodeText(error, error_text);
408 return error;
409 }
410
411 /** convert the escaped sequences (eg, '%3A') of those special characters back to their
412 * original form (eg, ':').
413 */
414 public static String oaiDecode(String escaped_str) {
415 logger.info("oaiDecode() " +escaped_str);
416 for (int i=0; i<special_char.length; i++) {
417 if (escaped_str.indexOf(escape_sequence[i]) != -1) {
418 escaped_str = escaped_str.replaceAll(escape_sequence[i], special_char[i]);
419 }
420 }
421 //escaped_str = escaped_str.replaceAll("%3A", ":");
422 return escaped_str;
423 }
424 /** convert those special characters (eg, ':') to their
425 * escaped sequences (eg, '%3A').
426 */
427 public static String oaiEncode(String original_str) {
428 logger.info("oaiEncode() " + original_str);
429 for (int i=0; i<special_char.length; i++) {
430 if (original_str.indexOf(special_char[i]) != -1) {
431 original_str = original_str.replaceAll(special_char[i], escape_sequence[i]);
432 }
433 }
434 //original_str = original_str.replaceAll(":", "%3A");
435 return original_str;
436 }
437 /** convert YYYY-MM_DDThh:mm:ssZ to yyyy-MM-ddTHH:mm:ssZ
438 */
439 public static String convertToJava(String oai_format) {
440 oai_format = oai_format.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
441 return oai_format;
442 }
443 /** convert yyyy-MM-ddTHH:mm:ssZ to YYYY-MM_DDThh:mm:ssZ
444 */
445 public static String convertToOAI(String java_format) {
446 java_format = java_format.replaceAll("yyyy", "YYYY").replaceAll("dd", "DD").replaceAll("HH", "hh");
447 return java_format;
448 }
449 public static String getCurrentUTCTime() {
450 Date current_utc = new Date(System.currentTimeMillis());
451 //granularity is in the form: yyyy-MM-dd'T'HH:mm:ss'Z '
452 DateFormat formatter = new SimpleDateFormat(granularity);
453 return formatter.format(current_utc);
454 }
455 /** get a Date object from a Date format pattern string
456 *
457 * @param pattern - in the form: 2007-06-14T16:48:25Z, for example.
458 * @return a Date object - null if the pattern is not in the specified form
459 */
460
461 public static Date getDate(String pattern) {
462 if (pattern == null || pattern.equals("")) {
463 return null;
464 }
465 Date date = null;
466// String str = pattern.replaceAll("T", " ");
467// str = str.replaceAll("Z", "");
468 SimpleDateFormat sdf = null;
469 try {
470 sdf = new SimpleDateFormat(granularity);
471 date = sdf.parse(pattern);
472 } catch(Exception e) {
473 if(!default_granularity.equals(granularity)) { // try validating against default granularity
474 try {
475 date = null;
476 sdf = null;
477 sdf = new SimpleDateFormat(default_granularity);
478 date = sdf.parse(pattern);
479 } catch(Exception ex) {
480 logger.error("invalid date format: " + pattern);
481 return null;
482 }
483 } else {
484 logger.error("invalid date format: " + pattern);
485 return null;
486 }
487 }
488 return date;
489 }
490 /** get the million second value from a string representing time in a pattern
491 * (eg, 2007-06-14T16:48:25Z)
492 */
493 public static long getTime(String pattern) {
494 if (pattern == null || pattern.equals("")) {
495 return -1;
496 }
497 Date date = null;
498 SimpleDateFormat sdf = null;
499 try {
500 //granularity is a global variable in the form: yyyy-MM-ddTHH:mm:ssZ
501 sdf = new SimpleDateFormat(granularity);
502 date = sdf.parse(pattern);
503 } catch(Exception e) {
504 if(!default_granularity.equals(granularity)) { // try validating against default granularity
505 try {
506 date = null;
507 sdf = null;
508 sdf = new SimpleDateFormat(default_granularity);
509 date = sdf.parse(pattern);
510 } catch(Exception ex) {
511 logger.error("invalid date format: " + pattern);
512 return -1;
513 }
514 } else {
515 logger.error("invalid date format: " + pattern);
516 return -1;
517 }
518 }
519 return date.getTime();
520 }
521 /** get the string representation of a time from a long value(long type)
522 */
523 public static String getTime(long milliseconds) {
524 Date date = new Date(milliseconds);
525 SimpleDateFormat sdf = new SimpleDateFormat(granularity);
526 return sdf.format(date);
527 }
528 public static Element createResponseHeader(String verb) {
529 String tag_name = (oai_version.equals(OAI_VERSION2))? OAI_DASH_PMH : verb;
530 Element oai = response_doc.createElement(tag_name);
531 Element resp_date = response_doc.createElement(RESPONSE_DATE);
532 Element req = response_doc.createElement(REQUEST);
533 oai.appendChild(resp_date);
534 oai.appendChild(req);
535
536 if(oai_version.equals(OAI_VERSION2)) {
537 oai.setAttribute("xmlns", "http://www.openarchives.org/OAI/2.0/");
538 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
539 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/ \n http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd");
540 } else {
541 oai.setAttribute("xmlns", "http://www.openarchives.com/OAI/1.1/OAI_" + verb);
542 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
543 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/OAI_" + verb + "\n http://www.openarchives.org/OAI/1.1/OAI_" + verb + ".xsd");
544 }
545 return oai;
546 }
547 public static Element getMetadataPrefixElement(String tag_name, String version) {
548 //examples of tag_name: dc, oai_dc:dc, etc.
549 Element oai = response_doc.createElement(tag_name);
550 if (version.equals(OAI_VERSION2)) {
551 oai.setAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
552 oai.setAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
553 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
554 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ \n http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
555 } else {
556 oai.setAttribute("xmlns", "ttp://www.openarchives.com/OAI/1.1/");
557 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
558 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/" + tag_name + ".xsd");
559 }
560
561 return oai;
562 }
563 public static HashMap getChildrenMapByTagName(Node n, String tag_name) {
564
565 HashMap map= new HashMap();
566 Node child = n.getFirstChild();
567 while (child!=null) {
568 String name = child.getNodeName();
569 if(name.equals(tag_name)) {
570 map.put(name, child);
571 }
572 child = child.getNextSibling();
573 }
574 return map;
575 }
576
577 /** Duplicates an element */
578 public static Element duplicateElement (Document owner, Element element, boolean with_attributes) {
579 return duplicateElementNS (owner, element, null, with_attributes);
580 }
581
582 /** Duplicates an element */
583 public static Element duplicateElementNS (Document owner,
584 Element element,
585 String namespace_uri,
586 boolean with_attributes) {
587 Element duplicate;
588 if (namespace_uri == null) {
589 duplicate = owner.createElement (element.getTagName ());
590 } else {
591 duplicate = owner.createElementNS (namespace_uri, element.getTagName ());
592 }
593 // Copy element attributes
594 if (with_attributes) {
595 NamedNodeMap attributes = element.getAttributes ();
596 for (int i = 0; i < attributes.getLength (); i++) {
597 Node attribute = attributes.item (i);
598 duplicate.setAttribute (attribute.getNodeName (), attribute.getNodeValue ());
599 }
600 }
601
602 // Copy element children
603 NodeList children = element.getChildNodes ();
604 for (int i = 0; i < children.getLength (); i++) {
605 Node child = children.item (i);
606 duplicate.appendChild (owner.importNode (child, true));
607 }
608
609 return duplicate;
610 }
611
612 public static void copyElement(Element to, Element from, String elem_name) {
613
614 Document to_doc = to.getOwnerDocument();
615 Node child = from.getFirstChild();
616 while (child != null) {
617 if (child.getNodeName().equals(elem_name)) {
618 to.appendChild(to_doc.importNode(child, true));
619 return;
620 }
621 child = child.getNextSibling();
622 }
623 }
624 public static HashMap getParamMap(NodeList params) {
625 HashMap map = new HashMap();
626 for(int i=0; i<params.getLength(); i++) {
627 Element param = (Element)params.item(i);
628 String param_name = param.getAttribute(OAIXML.NAME);
629 String param_value = param.getAttribute(OAIXML.VALUE);
630 map.put(param_name, param_value);
631 }
632 return map;
633 }
634 /** Parse an XML document from a given file */
635 static public Document parseXMLFile (File xml_file) {
636 // No file? No point trying!
637 if (xml_file.exists () == false) {
638 return null;
639 }
640 Document doc = null;
641 try {
642 doc = parseXML (new FileInputStream (xml_file));
643 }
644 catch (Exception exception) {
645 logger.error(exception.toString());
646 return null;
647 }
648 return doc;
649 }
650
651
652 /** Parse an XML document from a given input stream */
653 static public Document parseXML (InputStream xml_input_stream) {
654 Document document = null;
655
656 try {
657 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
658 Reader xml_reader = new BufferedReader (isr);
659 document = parseXML (xml_reader);
660 isr.close ();
661 xml_input_stream.close ();
662 }
663 catch (Exception exception) {
664 logger.error(exception.toString());
665 }
666
667 return document;
668 }
669
670 /** Parse an XML document from a given reader */
671 static public Document parseXML (Reader xml_reader) {
672 Document document = null;
673
674 try {
675 InputSource isc = new InputSource (xml_reader);
676 DOMParser parser = new DOMParser ();
677 parser.setFeature ("http://xml.org/sax/features/validation", false);
678 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
679 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
680 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
681 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
682 parser.parse (isc);
683 document = parser.getDocument ();
684 }
685 catch (SAXException exception) {
686 System.err.println ("SAX exception: " + exception.getMessage ());
687 logger.error(exception.toString());
688 }
689 catch (Exception exception) {
690 logger.error(exception.toString());
691 }
692
693 return document;
694 }
695 /** Write an XML document to a given file */
696 static public boolean writeXMLFile (File xml_file, Document document) {
697 try {
698 OutputStream os = new FileOutputStream (xml_file);
699 // Create an output format for our document.
700 OutputFormat f = new OutputFormat (document);
701 f.setEncoding ("UTF-8");
702 f.setIndenting (true);
703 f.setLineWidth (0); // Why isn't this working!
704 f.setPreserveSpace (false);
705 // Create the necessary writer stream for serialization.
706 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
707 Writer w = new BufferedWriter (osw);
708 // Generate a new serializer from the above.
709 XMLSerializer s = new XMLSerializer (w, f);
710 s.asDOMSerializer ();
711 // Finally serialize the document to file.
712 s.serialize (document);
713 // And close.
714 os.close ();
715 return true;
716 }
717 catch (Exception exception) {
718 logger.error(exception.toString());
719 return false;
720 }
721 }
722
723
724}
725
726
727
728
729
730
Note: See TracBrowser for help on using the repository browser.