source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/util/OAIXML.java@ 24007

Last change on this file since 24007 was 24007, checked in by sjm84, 13 years ago

Updating this branch to match the latest Greenstone3 changes

File size: 29.7 KB
Line 
1/*
2 * OAIXML.java
3 * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21import org.greenstone.util.GlobalProperties;
22
23import org.w3c.dom.*;
24import java.io.*;
25import java.net.*;
26import java.util.*;
27import java.text.DateFormat;
28import java.text.SimpleDateFormat;
29import org.apache.xerces.parsers.*;
30import org.apache.xml.serialize.*;
31
32// SAX
33import org.xml.sax.XMLReader;
34import org.xml.sax.SAXException;
35import org.xml.sax.SAXParseException;
36import org.xml.sax.helpers.DefaultHandler;
37import org.xml.sax.InputSource;
38
39// JAXP
40import javax.xml.parsers.FactoryConfigurationError;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43import javax.xml.parsers.SAXParserFactory;
44
45// import file Logger.java
46import org.apache.log4j.*;
47
48/** these constants are used for the OAI service */
49public class OAIXML {
50
51 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.GSXML.class.getName());
52
53 // the leading keyword of oai protocol
54 public static final String VERB = "verb";
55
56 // six valid oai verbs
57 public static final String GET_RECORD = "GetRecord";
58 public static final String LIST_RECORDS = "ListRecords";
59 public static final String LIST_IDENTIFIERS = "ListIdentifiers";
60 public static final String LIST_SETS = "ListSets";
61 public static final String LIST_METADATA_FORMATS = "ListMetadataFormats";
62 public static final String IDENTIFY = "Identify";
63
64 // other valid oai parameters
65 public static final String OAI_METADATAFORMAT = "OAIMetadataFormat";
66 public static final String METADATA_NAMESPACE = "metadataNamespace";
67 public static final String OAI_DC = "oai_dc";
68 public static final String DC = "dc";
69 public static final String METADATA_PREFIX = "metadataPrefix";
70 public static final String FROM = "from";
71 public static final String UNTIL = "until";
72 public static final String SET = "set";
73 public static final String RESUMPTION_TOKEN = "resumptionToken";
74 public static final String RESUMPTION_TOKEN_EXPIRATION = "resumptionTokenExpiration";
75 public static final String IDENTIFIER = "identifier";
76
77 public static final String USE_STYLESHEET = "useOAIStylesheet";
78 public static final String STYLESHEET = "OAIStylesheet";
79 // words used to compose oai responses
80 public static final String ADMIN_EMAIL = "adminEmail";
81 public static final String BAD_ARGUMENT = "badArgument";
82 public static final String BAD_RESUMPTION_TOKEN = "badResumptionToken";
83 public static final String BAD_VERB = "badVerb";
84 public static final String BASE_URL = "baseURL";
85 public static final String CANNOT_DISSEMINATE_FORMAT = "cannotDisseminateFormat";
86 public static final String CODE = "code";
87 public static final String COLLECTION = "collection";
88 public static final String COLLECTION_LIST = "collectionList";
89 public static final String COMPLETE_LIST_SIZE = "completeListSize";
90 public static final String COMPRESSION = "compression";
91 public static final String CURSOR = "cursor";
92 public static final String DATESTAMP = "datestamp";
93 public static final String DELETED_RECORD = "deletedRecord";
94 public static final String DESCRIPTION = "description";
95 public static final String EARLIEST_DATESTAMP = "earliestDatestamp";
96 public static final String ERROR = "error";
97 public static final String EXPIRATION_DATE = "expirationDate";
98 public static final String GRANULARITY = "granularity";
99 public static final String GS3OAI = "GS3OAI";
100 public static final String HAS_OAI = "hasOAI";
101 public static final String HEADER = "header";
102 public static final String ILLEGAL_OAI_VERB = "Illegal OAI verb";
103 public static final String INDEX_STEM = "indexStem";
104 public static final String LASTMODIFIED = "lastmodified";
105 public static final String MAPPING = "mapping";
106 public static final String MAPPING_LIST = "mappingList";
107 public static final String MESSAGE = "message";
108 public static final String METADATA = "metadata";
109 public static final String METADATA_FORMAT = "metadataFormat";
110 public static final String NAME = "name";
111 public static final String NO_RECORDS_MATCH = "noRecordsMatch";
112 public static final String OAI = "OAI";
113 public static final String OAI_DASH_PMH = "OAI-PMH";
114 public static final String OAI_LASTMODIFIED = "oailastmodified";
115 public static final String OAIPMH = "OAIPMH";
116 public static final String OAI_RESUMPTION_TOKENS = "OAIResumptionTokens";
117 public static final String OAI_SERVICE = "oaiService";
118 public static final String OAI_SET_LIST = "oaiSetList";
119 public static final String OAI_SERVICE_UNAVAILABLE = "OAI service unavailable";
120 public static final String OID = "OID";
121 public static final String PARAM = "param";
122 public static final String PARAM_LIST = "paramList";
123 public static final String PROTOCOL_VERSION = "protocolVersion";
124 public static final String RECORD = "record";
125 public static final String REQUEST = "request";
126 public static final String REPOSITORY_NAME = "repositoryName";
127 public static final String RESPONSE = "response";
128 public static final String RESPONSE_DATE = "responseDate";
129 public static final String RESUME_AFTER = "resumeAfter";
130 public static final String SCHEMA = "schema";
131 public static final String SERVICE = "service";
132 public static final String SERVICE_UNAVAILABLE = "service unavailable";
133 public static final String SET_SPEC = "setSpec";
134 public static final String SET_NAME = "setName";
135 public static final String SET_DESCRIPTION = "setDescription";
136 public static final String SITE = "site";
137 public static final String TO = "to";
138 public static final String TYPE = "type";
139 public static final String VALUE = "value";
140
141 //Two error and exception conditions for the verb 'ListMetadataFormats'
142 public static final String ID_DOES_NOT_EXIST = "idDoesNotExist";
143 public static final String NO_METADATA_FORMATS = "noMetadataFormats";
144
145 // The node id in the collection database, which contains all the OIDs in the database
146 public static final String BROWSELIST = "browselist";
147
148 //system-dependent file separator, maybe '/' or '\'
149 public static final String FILE_SEPARATOR = File.separator;
150 public static final String OAI_VERSION1 = "1.0";
151 public static final String OAI_VERSION2 = "2.0";
152 /*************************above are final values****************************/
153
154 public static Element resumption_token_elem = null;
155 //used when saving the token file
156 public static File resumption_token_file = null;
157 //public static ArrayList token_list = new ArrayList();
158
159 //initialized in getOAIConfigXML()
160 public static Element oai_config_elem = null;
161
162 //stores the date format "yyyy-MM-ddTHH:mm:ssZ"
163 public static String granularity = "";
164
165 // http://www.openarchives.org/OAI/openarchivesprotocol.html#DatestampsRequests
166 // specifies that all repositories must support YYYY-MM-DD (yyyy-MM-dd in Java)
167 // this would be in addition to the other (optional) granularity of above that
168 // a repository may additionally choose to support.
169 public static final String default_granularity = "yyyy-MM-dd";
170
171 //this value is overriden in getOAIConfigXML()
172 public static long token_expiration = 7200;
173
174 /** which version of oai that this oaiserver supports; default is 2.0
175 * initialized in getOAIConfigXML()
176 */
177 public static String oai_version = "2.0";
178
179 /**response owner document */
180 public static Document response_doc = new XMLConverter().newDOM();
181
182 public static String[] special_char = {"/", "?", "#", "=", "&", ":", ";", " ", "%", "+"};
183 public static String[] escape_sequence = {"%2F", "%3F", "%23", "%3D", "%26", "%3A", "%3B", "%20", "%25", "%2B"};
184// /** key=special character; value=escaped sequence */
185// public static HashMap encode_map = new HashMap();
186// /** key=escaped sequence; value=special character */
187// public static HashMap decode_map = new HashMap();
188
189 public static void init() {
190 resumption_token_elem = getOAIResumptionTokenXML();
191 }
192 public static String getOAIVersion() {
193 return oai_version;
194 }
195 public static Element createElement(String tag_name) {
196 return response_doc.createElement(tag_name);
197 }
198 /**Compose a response element used when OAIPMH service sending responses thru
199 * ServiceCluster and MessageRouter, as they automatically wrap a message element
200 * on this response element
201 */
202 public static Element getResponse(Element core_msg) {
203 Element res = createElement(RESPONSE);
204 res.appendChild(response_doc.importNode(core_msg, true));
205 return res;
206 }
207 /** Read in OAIResumptionToken.xml (residing web/WEB-INF/classes/) */
208 public static Element getOAIResumptionTokenXML() {
209
210 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
211 resumption_token_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
212 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIResumptionToken.xml");
213 if (resumption_token_file.exists()) {
214 Document token_doc = parseXMLFile(resumption_token_file);
215 if (token_doc != null) {
216 resumption_token_elem = token_doc.getDocumentElement();
217 } else {
218 logger.error("Fail to parse resumption token file OAIReceptionToken.xml.");
219 return null;
220 }
221 //remove all expired tokens
222 clearExpiredTokens();
223 return resumption_token_elem;
224 }
225 //if resumption_token_file does not exist
226 logger.info("resumption token file: "+ resumption_token_file.getPath()+" not found! create an empty one.");
227 resumption_token_elem = createElement(OAI_RESUMPTION_TOKENS);
228 saveOAIResumptionTokenXML(resumption_token_elem);
229 return resumption_token_elem;
230 }
231 public static void saveOAIResumptionTokenXML(Element token_elem) {
232 if(writeXMLFile(resumption_token_file, token_elem.getOwnerDocument()) == false) {
233 logger.error("Fail to save the resumption token file");
234 }
235 }
236 public static void clearExpiredTokens() {
237 boolean token_deleted = false;
238 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, RESUMPTION_TOKEN);
239 for (int i=0; i<tokens.getLength(); i++) {
240 Element token_elem = (Element)tokens.item(i);
241 String expire_str = token_elem.getAttribute(EXPIRATION_DATE);
242 long datestamp = getTime(expire_str); // expire_str is in milliseconds
243 if(datestamp < System.currentTimeMillis()) {
244 resumption_token_elem.removeChild(token_elem);
245 token_elem = null;
246 token_deleted = true;
247 }
248 }
249
250 if(token_deleted) {
251 saveOAIResumptionTokenXML(resumption_token_elem);
252 }
253 }
254 public static boolean containsToken(String token) {
255 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
256 for (int i=0; i<tokens.getLength(); i++) {
257 if(token.equals(GSXML.getNodeText((Element)tokens.item(i)).trim() ))
258 return true;
259 }
260 return false;
261 }
262 public static void addToken(Element token) {
263 Document doc = resumption_token_elem.getOwnerDocument();
264 resumption_token_elem.appendChild(duplicateElement(doc, token, true));
265 saveOAIResumptionTokenXML(resumption_token_elem);
266 }
267 public static void addToken(String token) {
268 Element te = resumption_token_elem.getOwnerDocument().createElement(OAIXML.RESUMPTION_TOKEN);
269 //add expiration att
270 resumption_token_elem.appendChild(te);
271 saveOAIResumptionTokenXML(resumption_token_elem);
272 }
273 public static boolean removeToken(String token) {
274 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
275 int num_tokens = tokens.getLength();
276 for (int i=0; i<num_tokens; i++) {
277 Element e = (Element)(tokens.item(i));
278 if(token.equals(GSXML.getNodeText(e))) {
279 resumption_token_elem.removeChild(e);
280 saveOAIResumptionTokenXML(resumption_token_elem);
281 return true;
282 }
283 }
284 return false;
285 }
286 /** Read in OAIConfig.xml (residing web/WEB-INF/classes/) and use it to configure the receptionist etc.
287 * the oai_version variable is also set in here.
288 * The init() method is also called in here. */
289 public static Element getOAIConfigXML() {
290 init();
291
292 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
293 File oai_config_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
294 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIConfig.xml");
295 if (!oai_config_file.exists()) {
296 logger.error(" oai config file: "+oai_config_file.getPath()+" not found!");
297 return null;
298 }
299 Document oai_config_doc = parseXMLFile(oai_config_file);
300 if (oai_config_doc != null) {
301 oai_config_elem = oai_config_doc.getDocumentElement();
302 } else {
303 logger.error("Fail to parse oai config file OAIConfig.xml.");
304 return null;
305 }
306
307 //initialize oai_version
308 Element protocol_version = (Element)GSXML.getChildByTagName(oai_config_elem, PROTOCOL_VERSION);
309 oai_version = GSXML.getNodeText(protocol_version).trim();
310
311 //initialize token_expiration
312 Element expiration = (Element)GSXML.getChildByTagName(oai_config_elem, RESUMPTION_TOKEN_EXPIRATION);
313 String expire_str = GSXML.getNodeText(expiration).trim();
314 if (expiration != null && !expire_str.equals("")) {
315 token_expiration = Long.parseLong(expire_str);
316 }
317
318 // read granularity from the config file
319 Element granu_elem = (Element)GSXML.getChildByTagName(oai_config_elem, GRANULARITY);
320 //initialize the granu_str which might be used by other methods (eg, getDate())
321 granularity = GSXML.getNodeText(granu_elem).trim();
322
323 //change "yyyy-MM-ddTHH:mm:ssZ" to "yyyy-MM-dd'T'HH:mm:ss'Z'"
324 granularity = granularity.replaceAll("T", "'T'");
325 granularity = granularity.replaceAll("Z", "'Z'");
326 granularity = granularity.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
327 return oai_config_elem;
328 }
329
330 public static String[] getMetadataMapping(Element metadata_format) {
331
332 if (metadata_format == null) {
333 return null;
334 }
335 NodeList mappings = metadata_format.getElementsByTagName(MAPPING);
336 int size = mappings.getLength();
337 if (size == 0) {
338 logger.info("No metadata mappings are provided in OAIConfig.xml.");
339 return null;
340 }
341 String[] names = new String[size];
342 for (int i=0; i<size; i++) {
343 names[i] = GSXML.getNodeText((Element)mappings.item(i)).trim();
344 }
345 return names;
346
347 }
348
349 public static String[] getGlobalMetadataMapping(String prefix) {
350 Element list_meta_formats = (Element)GSXML.getChildByTagName(oai_config_elem, LIST_METADATA_FORMATS);
351 if(list_meta_formats == null) {
352 return null;
353 }
354 Element metadata_format = GSXML.getNamedElement(list_meta_formats, METADATA_FORMAT, METADATA_PREFIX, prefix);
355 if(metadata_format == null) {
356 return null;
357 }
358 return getMetadataMapping(metadata_format);
359 }
360
361
362 public static long getTokenExpiration() {
363 return token_expiration*1000; // in milliseconds
364 }
365
366 /** TODO: returns a basic response for appropriate oai version
367 *
368 */
369 public static Element createBasicResponse(String verb, String[] pairs) {
370
371 Element response = createResponseHeader(verb);
372
373 //set the responseDate and request elements accordingly
374 Element request_elem = (Element)GSXML.getChildByTagName(response, REQUEST);
375 if (verb.equals("")) {
376 request_elem.setAttribute(VERB, verb);
377 }
378 int num_pairs = (pairs==null)? 0 : pairs.length;
379 for (int i=num_pairs - 1; i>=0; i--) {
380 int index = pairs[i].indexOf("=");
381 if (index != -1) {
382 String[] strs = pairs[i].split("=");
383 if(strs != null && strs.length == 2) {
384 request_elem.setAttribute(strs[0], oaiDecode(strs[1]));
385 }
386 }
387 }//end of for()
388 Element base_url_elem = (Element)GSXML.getChildByTagName(oai_config_elem, BASE_URL);
389 String base_url = GSXML.getNodeText(base_url_elem);
390 GSXML.setNodeText(request_elem, base_url);
391
392 Node resp_date = GSXML.getChildByTagName(response, RESPONSE_DATE);
393 if (resp_date != null) {
394 GSXML.setNodeText((Element)resp_date, getCurrentUTCTime());
395 }
396
397 return response;
398 }
399 /** @param error_code the value of the code attribute
400 * @param error_text the node text of the error element
401 * @return an oai error element
402 * Used by receptionist
403 */
404 public static Element createErrorElement(String error_code, String error_text) {
405 Element error = createElement(ERROR);
406 error.setAttribute(CODE, error_code);
407 GSXML.setNodeText(error, error_text);
408 return error;
409 }
410
411 /** convert the escaped sequences (eg, '%3A') of those special characters back to their
412 * original form (eg, ':').
413 */
414 public static String oaiDecode(String escaped_str) {
415 logger.info("oaiDecode() " +escaped_str);
416 for (int i=0; i<special_char.length; i++) {
417 if (escaped_str.indexOf(escape_sequence[i]) != -1) {
418 escaped_str = escaped_str.replaceAll(escape_sequence[i], special_char[i]);
419 }
420 }
421 //escaped_str = escaped_str.replaceAll("%3A", ":");
422 return escaped_str;
423 }
424 /** convert those special characters (eg, ':') to their
425 * escaped sequences (eg, '%3A').
426 */
427 public static String oaiEncode(String original_str) {
428 logger.info("oaiEncode() " + original_str);
429 for (int i=0; i<special_char.length; i++) {
430 if (original_str.indexOf(special_char[i]) != -1) {
431 original_str = original_str.replaceAll(special_char[i], escape_sequence[i]);
432 }
433 }
434 //original_str = original_str.replaceAll(":", "%3A");
435 return original_str;
436 }
437 /** convert YYYY-MM_DDThh:mm:ssZ to yyyy-MM-ddTHH:mm:ssZ
438 */
439 public static String convertToJava(String oai_format) {
440 oai_format = oai_format.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
441 return oai_format;
442 }
443 /** convert yyyy-MM-ddTHH:mm:ssZ to YYYY-MM_DDThh:mm:ssZ
444 */
445 public static String convertToOAI(String java_format) {
446 java_format = java_format.replaceAll("yyyy", "YYYY").replaceAll("dd", "DD").replaceAll("HH", "hh");
447 return java_format;
448 }
449 public static String getCurrentUTCTime() {
450 Date current_utc = new Date(System.currentTimeMillis());
451 //granularity is in the form: yyyy-MM-dd'T'HH:mm:ss'Z '
452 DateFormat formatter = new SimpleDateFormat(granularity);
453 return formatter.format(current_utc);
454 }
455 /** get a Date object from a Date format pattern string
456 *
457 * @param pattern - in the form: 2007-06-14T16:48:25Z, for example.
458 * @return a Date object - null if the pattern is not in the specified form
459 */
460
461 public static Date getDate(String pattern) {
462 if (pattern == null || pattern.equals("")) {
463 return null;
464 }
465 Date date = null;
466// String str = pattern.replaceAll("T", " ");
467// str = str.replaceAll("Z", "");
468 SimpleDateFormat sdf = null;
469 try {
470 sdf = new SimpleDateFormat(granularity);
471 date = sdf.parse(pattern);
472 } catch(Exception e) {
473 if(!default_granularity.equals(granularity)) { // try validating against default granularity
474 try {
475 date = null;
476 sdf = null;
477 sdf = new SimpleDateFormat(default_granularity);
478 date = sdf.parse(pattern);
479 } catch(Exception ex) {
480 logger.error("invalid date format: " + pattern);
481 return null;
482 }
483 } else {
484 logger.error("invalid date format: " + pattern);
485 return null;
486 }
487 }
488 return date;
489 }
490 /** get the million second value from a string representing time in a pattern
491 * (eg, 2007-06-14T16:48:25Z)
492 */
493 public static long getTime(String pattern) {
494 if (pattern == null || pattern.equals("")) {
495 return -1;
496 }
497 Date date = null;
498 SimpleDateFormat sdf = null;
499 try {
500 //granularity is a global variable in the form: yyyy-MM-ddTHH:mm:ssZ
501 sdf = new SimpleDateFormat(granularity);
502 date = sdf.parse(pattern);
503 } catch(Exception e) {
504 if(!default_granularity.equals(granularity)) { // try validating against default granularity
505 try {
506 date = null;
507 sdf = null;
508 sdf = new SimpleDateFormat(default_granularity);
509 date = sdf.parse(pattern);
510 } catch(Exception ex) {
511 logger.error("invalid date format: " + pattern);
512 return -1;
513 }
514 } else {
515 logger.error("invalid date format: " + pattern);
516 return -1;
517 }
518 }
519 return date.getTime();
520 }
521 /** get the string representation of a time from a long value(long type)
522 */
523 public static String getTime(long milliseconds) {
524 Date date = new Date(milliseconds);
525 SimpleDateFormat sdf = new SimpleDateFormat(granularity);
526 return sdf.format(date);
527 }
528 public static Element createResponseHeader(String verb) {
529 String tag_name = (oai_version.equals(OAI_VERSION2))? OAI_DASH_PMH : verb;
530 Element oai = response_doc.createElement(tag_name);
531 Element resp_date = response_doc.createElement(RESPONSE_DATE);
532 Element req = response_doc.createElement(REQUEST);
533 oai.appendChild(resp_date);
534 oai.appendChild(req);
535
536 if(oai_version.equals(OAI_VERSION2)) {
537 oai.setAttribute("xmlns", "http://www.openarchives.org/OAI/2.0/");
538 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
539 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/ \n http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd");
540 } else {
541 oai.setAttribute("xmlns", "http://www.openarchives.com/OAI/1.1/OAI_" + verb);
542 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
543 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/OAI_" + verb + "\n http://www.openarchives.org/OAI/1.1/OAI_" + verb + ".xsd");
544 }
545 return oai;
546 }
547 public static Element getMetadataPrefixElement(String tag_name, String version) {
548 //examples of tag_name: dc, oai_dc:dc, etc.
549 Element oai = response_doc.createElement(tag_name);
550 if (version.equals(OAI_VERSION2)) {
551 oai.setAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
552 oai.setAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
553 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
554 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ \n http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
555 } else {
556 oai.setAttribute("xmlns", "ttp://www.openarchives.com/OAI/1.1/");
557 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
558 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/" + tag_name + ".xsd");
559 }
560
561 return oai;
562 }
563 public static HashMap getChildrenMapByTagName(Node n, String tag_name) {
564
565 HashMap map= new HashMap();
566 Node child = n.getFirstChild();
567 while (child!=null) {
568 String name = child.getNodeName();
569 if(name.equals(tag_name)) {
570 map.put(name, child);
571 }
572 child = child.getNextSibling();
573 }
574 return map;
575 }
576
577 /** Duplicates an element */
578 public static Element duplicateElement (Document owner, Element element, boolean with_attributes) {
579 return duplicateElementNS (owner, element, null, with_attributes);
580 }
581
582 /** Duplicates an element */
583 public static Element duplicateElementNS (Document owner,
584 Element element,
585 String namespace_uri,
586 boolean with_attributes) {
587 Element duplicate;
588 if (namespace_uri == null) {
589 duplicate = owner.createElement (element.getTagName ());
590 } else {
591 duplicate = owner.createElementNS (namespace_uri, element.getTagName ());
592 }
593 // Copy element attributes
594 if (with_attributes) {
595 NamedNodeMap attributes = element.getAttributes ();
596 for (int i = 0; i < attributes.getLength (); i++) {
597 Node attribute = attributes.item (i);
598 duplicate.setAttribute (attribute.getNodeName (), attribute.getNodeValue ());
599 }
600 }
601
602 // Copy element children
603 NodeList children = element.getChildNodes ();
604 for (int i = 0; i < children.getLength (); i++) {
605 Node child = children.item (i);
606 duplicate.appendChild (owner.importNode (child, true));
607 }
608
609 return duplicate;
610 }
611
612 public static void copyElement(Element to, Element from, String elem_name) {
613
614 Document to_doc = to.getOwnerDocument();
615 Node child = from.getFirstChild();
616 while (child != null) {
617 if (child.getNodeName().equals(elem_name)) {
618 to.appendChild(to_doc.importNode(child, true));
619 return;
620 }
621 child = child.getNextSibling();
622 }
623 }
624 public static HashMap getParamMap(NodeList params) {
625 HashMap map = new HashMap();
626 for(int i=0; i<params.getLength(); i++) {
627 Element param = (Element)params.item(i);
628 String param_name = param.getAttribute(OAIXML.NAME);
629 String param_value = param.getAttribute(OAIXML.VALUE);
630 map.put(param_name, param_value);
631 }
632 return map;
633 }
634 /** Parse an XML document from a given file */
635 static public Document parseXMLFile (File xml_file) {
636 // No file? No point trying!
637 if (xml_file.exists () == false) {
638 return null;
639 }
640 Document doc = null;
641 try {
642 doc = parseXML (new FileInputStream (xml_file));
643 }
644 catch (Exception exception) {
645 logger.error(exception.toString());
646 return null;
647 }
648 return doc;
649 }
650
651
652 /** Parse an XML document from a given input stream */
653 static public Document parseXML (InputStream xml_input_stream) {
654 Document document = null;
655
656 try {
657 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
658 Reader xml_reader = new BufferedReader (isr);
659 document = parseXML (xml_reader);
660 isr.close ();
661 xml_input_stream.close ();
662 }
663 catch (Exception exception) {
664 logger.error(exception.toString());
665 }
666
667 return document;
668 }
669
670 /** Parse an XML document from a given reader */
671 static public Document parseXML (Reader xml_reader) {
672 Document document = null;
673
674 try {
675 InputSource isc = new InputSource (xml_reader);
676 DOMParser parser = new DOMParser ();
677 parser.setFeature ("http://xml.org/sax/features/validation", false);
678 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
679 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
680 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
681 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
682 parser.parse (isc);
683 document = parser.getDocument ();
684 }
685 catch (SAXException exception) {
686 System.err.println ("SAX exception: " + exception.getMessage ());
687 logger.error(exception.toString());
688 }
689 catch (Exception exception) {
690 logger.error(exception.toString());
691 }
692
693 return document;
694 }
695 /** Write an XML document to a given file */
696 static public boolean writeXMLFile (File xml_file, Document document) {
697 try {
698 OutputStream os = new FileOutputStream (xml_file);
699 // Create an output format for our document.
700 OutputFormat f = new OutputFormat (document);
701 f.setEncoding ("UTF-8");
702 f.setIndenting (true);
703 f.setLineWidth (0); // Why isn't this working!
704 f.setPreserveSpace (false);
705 // Create the necessary writer stream for serialization.
706 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
707 Writer w = new BufferedWriter (osw);
708 // Generate a new serializer from the above.
709 XMLSerializer s = new XMLSerializer (w, f);
710 s.asDOMSerializer ();
711 // Finally serialize the document to file.
712 s.serialize (document);
713 // And close.
714 os.close ();
715 return true;
716 }
717 catch (Exception exception) {
718 logger.error(exception.toString());
719 return false;
720 }
721 }
722
723
724}
725
726
727
728
729
730
Note: See TracBrowser for help on using the repository browser.