source: greenstone3/trunk/src/java/org/greenstone/gsdl3/util/OAIXML.java@ 15322

Last change on this file since 15322 was 15322, checked in by kjdon, 16 years ago

added support for JDBM (or other) in place of GDBM: changed a comment

File size: 29.3 KB
Line 
1package org.greenstone.gsdl3.util;
2
3import org.w3c.dom.*;
4import java.io.*;
5import java.net.*;
6import java.util.*;
7import java.text.DateFormat;
8import java.text.SimpleDateFormat;
9import org.apache.xerces.parsers.*;
10import org.apache.xml.serialize.*;
11
12// SAX
13import org.xml.sax.XMLReader;
14import org.xml.sax.SAXException;
15import org.xml.sax.SAXParseException;
16import org.xml.sax.helpers.DefaultHandler;
17import org.xml.sax.InputSource;
18
19// JAXP
20import javax.xml.parsers.FactoryConfigurationError;
21import javax.xml.parsers.ParserConfigurationException;
22import javax.xml.parsers.SAXParser;
23import javax.xml.parsers.SAXParserFactory;
24
25// import file Logger.java
26import org.apache.log4j.*;
27
28/** these constants are used for the OAI service */
29public class OAIXML {
30
31 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.GSXML.class.getName());
32
33 // the leading keyword of oai protocol
34 public static final String VERB = "verb";
35
36 // six valid oai verbs
37 public static final String GET_RECORD = "GetRecord";
38 public static final String LIST_RECORDS = "ListRecords";
39 public static final String LIST_IDENTIFIERS = "ListIdentifiers";
40 public static final String LIST_SETS = "ListSets";
41 public static final String LIST_METADATA_FORMATS = "ListMetadataFormats";
42 public static final String IDENTIFY = "Identify";
43
44 // other valid oai parameters
45 public static final String OAI_METADATAFORMAT = "OAIMetadataFormat";
46 public static final String METADATA_NAMESPACE = "metadataNamespace";
47 public static final String OAI_DC = "oai_dc";
48 public static final String DC = "dc";
49 public static final String METADATA_PREFIX = "metadataPrefix";
50 public static final String FROM = "from";
51 public static final String UNTIL = "until";
52 public static final String SET = "set";
53 public static final String RESUMPTION_TOKEN = "resumptionToken";
54 public static final String RESUMPTION_TOKEN_EXPIRATION = "resumptionTokenExpiration";
55 public static final String IDENTIFIER = "identifier";
56
57 // words used to compose oai responses
58 public static final String ADMIN_EMAIL = "adminEmail";
59 public static final String BAD_ARGUMENT = "badArgument";
60 public static final String BAD_RESUMPTION_TOKEN = "badResumptionToken";
61 public static final String BAD_VERB = "badVerb";
62 public static final String BASE_URL = "baseURL";
63 public static final String CANNOT_DISSEMINATE_FORMAT = "cannotDisseminateFormat";
64 public static final String CODE = "code";
65 public static final String COLLECTION = "collection";
66 public static final String COLLECTION_LIST = "collectionList";
67 public static final String COMPLETE_LIST_SIZE = "completeListSize";
68 public static final String COMPRESSION = "compression";
69 public static final String CURSOR = "cursor";
70 public static final String DATESTAMP = "datestamp";
71 public static final String DC_METADATA_NAMES = "DCMetadataNames";
72 public static final String DELETED_RECORD = "deletedRecord";
73 public static final String DESCRIPTION = "description";
74 public static final String EARLIEST_DATESTAMP = "earliestDatestamp";
75 public static final String ERROR = "error";
76 public static final String EXPIRATION_DATE = "expirationDate";
77 public static final String GRANULARITY = "granularity";
78 public static final String GS3OAI = "GS3OAI";
79 public static final String HAS_OAI = "hasOAI";
80 public static final String HEADER = "header";
81 public static final String ILLEGAL_OAI_VERB = "Illegal OAI verb";
82 public static final String INDEX_STEM = "indexStem";
83 public static final String LASTMODIFIED = "lastmodified";
84 public static final String MAPPING = "mapping";
85 public static final String MAPPING_LIST = "mappingList";
86 public static final String MESSAGE = "message";
87 public static final String METADATA = "metadata";
88 public static final String METADATA_FORMAT = "metadataFormat";
89 public static final String NAME = "name";
90 public static final String NO_RECORDS_MATCH = "noRecordsMatch";
91 public static final String OAI = "OAI";
92 public static final String OAI_DASH_PMH = "OAI-PMH";
93 public static final String OAIPMH = "OAIPMH";
94 public static final String OAI_RESUMPTION_TOKENS = "OAIResumptionTokens";
95 public static final String OAI_SERVICE = "oaiService";
96 public static final String OAI_SET_LIST = "oaiSetList";
97 public static final String OAI_SERVICE_UNAVAILABLE = "OAI service unavailable";
98 public static final String OID = "OID";
99 public static final String PARAM = "param";
100 public static final String PARAM_LIST = "paramList";
101 public static final String PROTOCOL_VERSION = "protocolVersion";
102 public static final String RECORD = "record";
103 public static final String REQUEST = "request";
104 public static final String REPOSITORY_NAME = "repositoryName";
105 public static final String RESPONSE = "response";
106 public static final String RESPONSE_DATE = "responseDate";
107 public static final String RESUME_AFTER = "resumeAfter";
108 public static final String SCHEMA = "schema";
109 public static final String SERVICE = "service";
110 public static final String SERVICE_UNAVAILABLE = "service unavailable";
111 public static final String SET_SPEC = "setSpec";
112 public static final String SET_NAME = "setName";
113 public static final String SET_DESCRIPTION = "setDescription";
114 public static final String SITE = "site";
115 public static final String TO = "to";
116 public static final String TYPE = "type";
117 public static final String VALUE = "value";
118
119 //Two error and exception conditions for the verb 'ListMetadataFormats'
120 public static final String ID_DOES_NOT_EXIST = "idDoesNotExist";
121 public static final String NO_METADATA_FORMATS = "noMetadataFormats";
122
123 // The node id in the collection database, which contains all the OIDs in the database
124 public static final String BROWSELIST = "browselist";
125
126 //system-dependent file separator, maybe '/' or '\'
127 public static final String FILE_SEPARATOR = File.separator;
128 public static final String OAI_VERSION1 = "1.0";
129 public static final String OAI_VERSION2 = "2.0";
130 /*************************above are final values****************************/
131
132 public static Element resumption_token_elem = null;
133 //used when saving the token file
134 public static File resumption_token_file = null;
135 //public static ArrayList token_list = new ArrayList();
136
137 //initialized in getOAIConfigXML()
138 public static Element oai_config_elem = null;
139
140 //stores the date format "yyyy-MM-ddTHH:mm:ssZ"
141 public static String granularity = "";
142 //this value is overriden in getOAIConfigXML()
143 public static long token_expiration = 7200;
144
145 /** which version of oai that this oaiserver supports; default is 2.0
146 * initialized in getOAIConfigXML()
147 */
148 public static String oai_version = "2.0";
149
150 /**response owner document */
151 public static Document response_doc = new XMLConverter().newDOM();
152
153 public static String[] special_char = {"/", "?", "#", "=", "&", ":", ";", " ", "%", "+"};
154 public static String[] escape_sequence = {"%2F", "%3F", "%23", "%3D", "%26", "%3A", "%3B", "%20", "%25", "%2B"};
155// /** key=special character; value=escaped sequence */
156// public static HashMap encode_map = new HashMap();
157// /** key=escaped sequence; value=special character */
158// public static HashMap decode_map = new HashMap();
159
160 public static void init() {
161 resumption_token_elem = getOAIResumptionTokenXML();
162 }
163 public static String getOAIVersion() {
164 return oai_version;
165 }
166 public static Element createElement(String tag_name) {
167 return response_doc.createElement(tag_name);
168 }
169 /**Compose a response element used when OAIPMH service sending responses thru
170 * ServiceCluster and MessageRouter, as they automatically wrap a message element
171 * on this response element
172 */
173 public static Element getResponse(Element core_msg) {
174 Element res = createElement(RESPONSE);
175 res.appendChild(response_doc.importNode(core_msg, true));
176 return res;
177 }
178 /** Read in OAIResumptionToken.xml (residing web/WEB-INF/classes/) */
179 public static Element getOAIResumptionTokenXML() {
180
181 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
182 resumption_token_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
183 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIResumptionToken.xml");
184 if (resumption_token_file.exists()) {
185 Document token_doc = parseXMLFile(resumption_token_file);
186 if (token_doc != null) {
187 resumption_token_elem = token_doc.getDocumentElement();
188 } else {
189 logger.error("Fail to parse resumption token file OAIReceptionToken.xml.");
190 return null;
191 }
192 //remove all expired tokens
193 clearExpiredTokens();
194 return resumption_token_elem;
195 }
196 //if resumption_token_file does not exist
197 logger.info("resumption token file: "+ resumption_token_file.getPath()+" not found! create an empty one.");
198 resumption_token_elem = createElement(OAI_RESUMPTION_TOKENS);
199 saveOAIResumptionTokenXML(resumption_token_elem);
200 return resumption_token_elem;
201 }
202 public static void saveOAIResumptionTokenXML(Element token_elem) {
203 if(writeXMLFile(resumption_token_file, token_elem.getOwnerDocument()) == false) {
204 logger.error("Fail to save the resumption token file");
205 }
206 }
207 public static void clearExpiredTokens() {
208 boolean token_deleted = false;
209 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, RESUMPTION_TOKEN);
210 for (int i=0; i<tokens.getLength(); i++) {
211 Element token_elem = (Element)tokens.item(i);
212 String expire_str = token_elem.getAttribute(EXPIRATION_DATE);
213 long datestamp = getTime(expire_str);
214 if(datestamp < System.currentTimeMillis()) {
215 resumption_token_elem.removeChild(token_elem);
216 token_elem = null;
217 token_deleted = true;
218 }
219 }
220
221 if(token_deleted) {
222 saveOAIResumptionTokenXML(resumption_token_elem);
223 }
224 }
225 public static boolean containsToken(String token) {
226 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
227 for (int i=0; i<tokens.getLength(); i++) {
228 if(token.equals(GSXML.getNodeText((Element)tokens.item(i)).trim() ))
229 return true;
230 }
231 return false;
232 }
233 public static void addToken(Element token) {
234 Document doc = resumption_token_elem.getOwnerDocument();
235 resumption_token_elem.appendChild(duplicateElement(doc, token, true));
236 saveOAIResumptionTokenXML(resumption_token_elem);
237 }
238 public static void addToken(String token) {
239 Element te = resumption_token_elem.getOwnerDocument().createElement(OAIXML.RESUMPTION_TOKEN);
240 //add expiration att
241 resumption_token_elem.appendChild(te);
242 saveOAIResumptionTokenXML(resumption_token_elem);
243 }
244 public static boolean removeToken(String token) {
245 NodeList tokens = GSXML.getChildrenByTagName(resumption_token_elem, OAIXML.RESUMPTION_TOKEN);
246 int num_tokens = tokens.getLength();
247 for (int i=0; i<num_tokens; i++) {
248 Element e = (Element)(tokens.item(i));
249 if(token.equals(GSXML.getNodeText(e))) {
250 resumption_token_elem.removeChild(e);
251 saveOAIResumptionTokenXML(resumption_token_elem);
252 return true;
253 }
254 }
255 return false;
256 }
257 /** Read in OAIConfig.xml (residing web/WEB-INF/classes/) and use it to configure the receptionist etc.
258 * the oai_version variable is also set in here.
259 * The init() method is also called in here. */
260 public static Element getOAIConfigXML() {
261 init();
262
263 // The system environment variable $GSDL3HOME(ends ../web) does not contain the file separator
264 File oai_config_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
265 "WEB-INF" + FILE_SEPARATOR + "classes" +FILE_SEPARATOR + "OAIConfig.xml");
266 if (!oai_config_file.exists()) {
267 logger.error(" oai config file: "+oai_config_file.getPath()+" not found!");
268 return null;
269 }
270 Document oai_config_doc = parseXMLFile(oai_config_file);
271 if (oai_config_doc != null) {
272 oai_config_elem = oai_config_doc.getDocumentElement();
273 } else {
274 logger.error("Fail to parse oai config file OAIConfig.xml.");
275 return null;
276 }
277
278 //initialize oai_version
279 Element protocol_version = (Element)GSXML.getChildByTagName(oai_config_elem, PROTOCOL_VERSION);
280 oai_version = GSXML.getNodeText(protocol_version).trim();
281
282 //initialize token_expiration
283 Element expiration = (Element)GSXML.getChildByTagName(oai_config_elem, RESUMPTION_TOKEN_EXPIRATION);
284 String expire_str = GSXML.getNodeText(expiration).trim();
285 if (expiration != null && !expire_str.equals("")) {
286 token_expiration = Long.parseLong(expire_str);
287 }
288
289 // read granularity from the config file
290 Element granu_elem = (Element)GSXML.getChildByTagName(oai_config_elem, GRANULARITY);
291 //initialize the granu_str which might be used by other methods (eg, getDate())
292 granularity = GSXML.getNodeText(granu_elem).trim();
293 //change "yyyy-MM-ddTHH:mm:ssZ" to "yyyy-MM-dd'T'HH:mm:ss'Z'"
294 granularity = granularity.replaceAll("T", "'T'");
295 granularity = granularity.replaceAll("Z", "'Z'");
296 granularity = granularity.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
297 return oai_config_elem;
298 }
299 public static String[] getGlobalMetadataMapping(String prefix) {
300 Element list_meta_formats = (Element)GSXML.getChildByTagName(oai_config_elem, LIST_METADATA_FORMATS);
301 if(list_meta_formats == null) {
302 return null;
303 }
304 Element metadata_format = GSXML.getNamedElement(list_meta_formats, METADATA_FORMAT, METADATA_PREFIX, prefix);
305 if(metadata_format == null) {
306 return null;
307 }
308 NodeList mappings = metadata_format.getElementsByTagName(MAPPING);
309 int size = mappings.getLength();
310 if (size == 0) {
311 logger.info("No metadata mappings are provided in OAIConfig.xml.");
312 return null;
313 }
314 String[] names = new String[size];
315 for (int i=0; i<size; i++) {
316 names[i] = GSXML.getNodeText((Element)mappings.item(i)).trim();
317 }
318 return names;
319 }
320 public static String[] getDublinCoreNames() {
321 // read the standard Dublin Core metadata names
322 //<DCmetadataNames>dc.Title,dc.Creator,dc.Subject,dc.Description,dc.Publisher,dc.Contributor,dc.Date,dc.Type,dc.Format,dc.Identifier,dc.Source,dc.Language,dc.Relation,dc.Coverage,dc.Rights</DCmetadataNames>
323 Element dc_metadata_names = (Element)GSXML.getChildByTagName(oai_config_elem, DC_METADATA_NAMES);
324 if(dc_metadata_names == null) {
325 logger.error("Dublin Core metadata names are not provided.");
326 return null;
327 }
328 String names = GSXML.getNodeText(dc_metadata_names).trim();
329 return names.split(",");
330// String[] str = {"dc.Title","dc.Creator","dc.Subject","dc.Description","dc.Publisher","dc.Contributor","dc.Date","dc.Type","dc.Format","dc.Identifier","dc.Source","dc.Language","dc.Relation","dc.Coverage","dc.Rights"};
331// return str;
332 }
333
334 public static long getTokenExpiration() {
335 return token_expiration*1000;
336 }
337 /** Read in collectionConfig.xml which contains the metadata format information and
338 * the metadata format mapping
339 */
340 public static Element getCollectionConfigXML(String site_name, String coll_name) {
341 init();
342
343 Element coll_config = null;
344
345 // The system environment variable $GSDL3HOME does not contain the file separator
346 File coll_config_file = new File(GlobalProperties.getGSDL3Home() + FILE_SEPARATOR +
347 "sites" + FILE_SEPARATOR + site_name + FILE_SEPARATOR + "collect" +FILE_SEPARATOR
348 + coll_name + FILE_SEPARATOR + "etc" + FILE_SEPARATOR + "collectionConfig.xml");
349 if (!coll_config_file.exists()) {
350 logger.error(" collection config file: "+coll_config_file.getPath()+" not found!");
351 return null;
352 }
353 Document coll_config_doc = parseXMLFile(coll_config_file);
354 if (coll_config_doc != null) {
355 coll_config = coll_config_doc.getDocumentElement();
356 } else {
357 logger.error("Fail to parse collectionConfig.xml of collection: " + coll_config_file.getPath());
358 return null;
359 }
360
361 return coll_config;
362 }
363 /** TODO: returns a basic response for appropriate oai version
364 *
365 */
366 public static Element createBasicResponse(String verb, String[] pairs) {
367
368 Element response = createResponseHeader(verb);
369
370 //set the responseDate and request elements accordingly
371 Element request_elem = (Element)GSXML.getChildByTagName(response, REQUEST);
372 if (verb.equals("")) {
373 request_elem.setAttribute(VERB, verb);
374 }
375 int num_pairs = (pairs==null)? 0 : pairs.length;
376 for (int i=num_pairs - 1; i>=0; i--) {
377 int index = pairs[i].indexOf("=");
378 if (index != -1) {
379 String[] strs = pairs[i].split("=");
380 if(strs != null && strs.length == 2) {
381 request_elem.setAttribute(strs[0], oaiDecode(strs[1]));
382 }
383 }
384 }//end of for()
385 Element base_url_elem = (Element)GSXML.getChildByTagName(oai_config_elem, BASE_URL);
386 String base_url = GSXML.getNodeText(base_url_elem);
387 GSXML.setNodeText(request_elem, base_url);
388
389 Node resp_date = GSXML.getChildByTagName(response, RESPONSE_DATE);
390 if (resp_date != null) {
391 GSXML.setNodeText((Element)resp_date, getCurrentUTCTime());
392 }
393
394 return response;
395 }
396 /** @param error_code the value of the code attribute
397 * @param error_text the node text of the error element
398 * @return an oai error element
399 * Used by receptionist
400 */
401 public static Element createErrorElement(String error_code, String error_text) {
402 Element error = createElement(ERROR);
403 error.setAttribute(CODE, error_code);
404 GSXML.setNodeText(error, error_text);
405 return error;
406 }
407
408 /** convert the escaped sequences (eg, '%3A') of those special characters back to their
409 * original form (eg, ':').
410 */
411 public static String oaiDecode(String escaped_str) {
412 logger.info("oaiDecode() " +escaped_str);
413 for (int i=0; i<special_char.length; i++) {
414 if (escaped_str.indexOf(escape_sequence[i]) != -1) {
415 escaped_str = escaped_str.replaceAll(escape_sequence[i], special_char[i]);
416 }
417 }
418 //escaped_str = escaped_str.replaceAll("%3A", ":");
419 return escaped_str;
420 }
421 /** convert those special characters (eg, ':') to their
422 * escaped sequences (eg, '%3A').
423 */
424 public static String oaiEncode(String original_str) {
425 logger.info("oaiEncode() " + original_str);
426 for (int i=0; i<special_char.length; i++) {
427 if (original_str.indexOf(special_char[i]) != -1) {
428 original_str = original_str.replaceAll(special_char[i], escape_sequence[i]);
429 }
430 }
431 //original_str = original_str.replaceAll(":", "%3A");
432 return original_str;
433 }
434 /** convert YYYY-MM_DDThh:mm:ssZ to yyyy-MM-ddTHH:mm:ssZ
435 */
436 public static String convertToJava(String oai_format) {
437 oai_format = oai_format.replaceAll("YYYY", "yyyy").replaceAll("DD", "dd").replaceAll("hh", "HH");
438 return oai_format;
439 }
440 /** convert yyyy-MM-ddTHH:mm:ssZ to YYYY-MM_DDThh:mm:ssZ
441 */
442 public static String convertToOAI(String java_format) {
443 java_format = java_format.replaceAll("yyyy", "YYYY").replaceAll("dd", "DD").replaceAll("HH", "hh");
444 return java_format;
445 }
446 public static String getCurrentUTCTime() {
447 Date current_utc = new Date(System.currentTimeMillis());
448 //granularity is in the form: yyyy-MM-dd'T'HH:mm:ss'Z '
449 DateFormat formatter = new SimpleDateFormat(granularity);
450 return formatter.format(current_utc);
451 }
452 /** get a Date object from a Date format pattern string
453 *
454 * @param pattern - in the form: 2007-06-14T16:48:25Z, for example.
455 * @return a Date object - null if the pattern is not in the specified form
456 */
457
458 public static Date getDate(String pattern) {
459 if (pattern == null || pattern.equals("")) {
460 return null;
461 }
462 Date date = null;
463// String str = pattern.replaceAll("T", " ");
464// str = str.replaceAll("Z", "");
465 SimpleDateFormat sdf = null;
466 try {
467 sdf = new SimpleDateFormat(granularity);
468 date = sdf.parse(pattern);
469 } catch(Exception e) {
470 logger.error("invalid date format: " + pattern);
471 return null;
472 }
473 return date;
474 }
475 /** get the million second value from a string representing time in a pattern
476 * (eg, 2007-06-14T16:48:25Z)
477 */
478 public static long getTime(String pattern) {
479 if (pattern == null || pattern.equals("")) {
480 return -1;
481 }
482 Date date = null;
483 SimpleDateFormat sdf = null;
484 try {
485 //granularity is a global variable in the form: yyyy-MM-ddTHH:mm:ssZ
486 sdf = new SimpleDateFormat(granularity);
487 date = sdf.parse(pattern);
488 } catch(Exception e) {
489 logger.error("invalid date format: " + pattern);
490 return -1;
491 }
492 return date.getTime();
493 }
494 /** get the string representation of a time from a long value(long type)
495 */
496 public static String getTime(long seconds) {
497 Date date = new Date(seconds);
498 SimpleDateFormat sdf = new SimpleDateFormat(granularity);
499 return sdf.format(date);
500 }
501 public static Element createResponseHeader(String verb) {
502 String tag_name = (oai_version.equals(OAI_VERSION2))? OAI_DASH_PMH : verb;
503 Element oai = response_doc.createElement(tag_name);
504 Element resp_date = response_doc.createElement(RESPONSE_DATE);
505 Element req = response_doc.createElement(REQUEST);
506 oai.appendChild(resp_date);
507 oai.appendChild(req);
508
509 if(oai_version.equals(OAI_VERSION2)) {
510 oai.setAttribute("xmlns", "http://www.openarchives.org/OAI/2.0/");
511 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
512 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0 \n http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd");
513 } else {
514 oai.setAttribute("xmlns", "http://www.openarchives.com/OAI/1.1/OAI_" + verb);
515 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
516 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/OAI_" + verb + "\n http://www.openarchives.org/OAI/1.1/OAI_" + verb + ".xsd");
517 }
518 return oai;
519 }
520 public static Element getMetadataPrefixElement(String tag_name, String version) {
521 //examples of tag_name: dc, oai_dc:dc, etc.
522 Element oai = response_doc.createElement(tag_name);
523 if (version.equals(OAI_VERSION2)) {
524 oai.setAttribute("xmlns", "http://www.openarchives.org/OAI/2.0/");
525 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
526 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0 \n http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd");
527 } else {
528 oai.setAttribute("xmlns", "ttp://www.openarchives.com/OAI/1.1/");
529 oai.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
530 oai.setAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/1.1/" + tag_name + ".xsd");
531 }
532
533 return oai;
534 }
535 public static HashMap getChildrenMapByTagName(Node n, String tag_name) {
536
537 HashMap map= new HashMap();
538 Node child = n.getFirstChild();
539 while (child!=null) {
540 String name = child.getNodeName();
541 if(name.equals(tag_name)) {
542 map.put(name, child);
543 }
544 child = child.getNextSibling();
545 }
546 return map;
547 }
548
549 /** Duplicates an element */
550 public static Element duplicateElement (Document owner, Element element, boolean with_attributes) {
551 return duplicateElementNS (owner, element, null, with_attributes);
552 }
553
554 /** Duplicates an element */
555 public static Element duplicateElementNS (Document owner,
556 Element element,
557 String namespace_uri,
558 boolean with_attributes) {
559 Element duplicate;
560 if (namespace_uri == null) {
561 duplicate = owner.createElement (element.getTagName ());
562 } else {
563 duplicate = owner.createElementNS (namespace_uri, element.getTagName ());
564 }
565 // Copy element attributes
566 if (with_attributes) {
567 NamedNodeMap attributes = element.getAttributes ();
568 for (int i = 0; i < attributes.getLength (); i++) {
569 Node attribute = attributes.item (i);
570 duplicate.setAttribute (attribute.getNodeName (), attribute.getNodeValue ());
571 }
572 }
573
574 // Copy element children
575 NodeList children = element.getChildNodes ();
576 for (int i = 0; i < children.getLength (); i++) {
577 Node child = children.item (i);
578 duplicate.appendChild (owner.importNode (child, true));
579 }
580
581 return duplicate;
582 }
583
584 public static void copyElement(Element to, Element from, String elem_name) {
585
586 Document to_doc = to.getOwnerDocument();
587 Node child = from.getFirstChild();
588 while (child != null) {
589 if (child.getNodeName().equals(elem_name)) {
590 to.appendChild(to_doc.importNode(child, true));
591 return;
592 }
593 child = child.getNextSibling();
594 }
595 }
596 public static HashMap getParamMap(NodeList params) {
597 HashMap map = new HashMap();
598 for(int i=0; i<params.getLength(); i++) {
599 Element param = (Element)params.item(i);
600 String param_name = param.getAttribute(OAIXML.NAME);
601 String param_value = param.getAttribute(OAIXML.VALUE);
602 map.put(param_name, param_value);
603 }
604 return map;
605 }
606 /** Parse an XML document from a given file */
607 static public Document parseXMLFile (File xml_file) {
608 // No file? No point trying!
609 if (xml_file.exists () == false) {
610 return null;
611 }
612 Document doc = null;
613 try {
614 doc = parseXML (new FileInputStream (xml_file));
615 }
616 catch (Exception exception) {
617 logger.error(exception.toString());
618 return null;
619 }
620 return doc;
621 }
622
623
624 /** Parse an XML document from a given input stream */
625 static public Document parseXML (InputStream xml_input_stream) {
626 Document document = null;
627
628 try {
629 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
630 Reader xml_reader = new BufferedReader (isr);
631 document = parseXML (xml_reader);
632 isr.close ();
633 xml_input_stream.close ();
634 }
635 catch (Exception exception) {
636 logger.error(exception.toString());
637 }
638
639 return document;
640 }
641
642 /** Parse an XML document from a given reader */
643 static public Document parseXML (Reader xml_reader) {
644 Document document = null;
645
646 try {
647 InputSource isc = new InputSource (xml_reader);
648 DOMParser parser = new DOMParser ();
649 parser.setFeature ("http://xml.org/sax/features/validation", false);
650 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
651 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
652 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
653 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
654 parser.parse (isc);
655 document = parser.getDocument ();
656 }
657 catch (SAXException exception) {
658 System.err.println ("SAX exception: " + exception.getMessage ());
659 logger.error(exception.toString());
660 }
661 catch (Exception exception) {
662 logger.error(exception.toString());
663 }
664
665 return document;
666 }
667 /** Write an XML document to a given file */
668 static public boolean writeXMLFile (File xml_file, Document document) {
669 try {
670 OutputStream os = new FileOutputStream (xml_file);
671 // Create an output format for our document.
672 OutputFormat f = new OutputFormat (document);
673 f.setEncoding ("UTF-8");
674 f.setIndenting (true);
675 f.setLineWidth (0); // Why isn't this working!
676 f.setPreserveSpace (false);
677 // Create the necessary writer stream for serialization.
678 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
679 Writer w = new BufferedWriter (osw);
680 // Generate a new serializer from the above.
681 XMLSerializer s = new XMLSerializer (w, f);
682 s.asDOMSerializer ();
683 // Finally serialize the document to file.
684 s.serialize (document);
685 // And close.
686 os.close ();
687 return true;
688 }
689 catch (Exception exception) {
690 logger.error(exception.toString());
691 return false;
692 }
693 }
694
695
696}
697
698
699
700
701
702
Note: See TracBrowser for help on using the repository browser.