/********************************************************************** * * ApplyXSLT.java * * Copyright 2006-2010 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ package org.nzdl.gsdl; import java.io.*; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import javax.xml.parsers.*; import javax.xml.transform.dom.*; import org.w3c.dom.*; /** * Use the TraX interface to perform a transformation in the simplest manner possible * (3 statements). */ public class ApplyXSLT { public static final String DOC_START = new String (""); public static final String DOC_END = new String (""); public static final String INPUT_END = new String (""); private static final String RECORD_ELEMENT = "record"; private static final String CONTROLFIELD_ELEMENT = "controlfield"; private static final String SUBFIELD_ELEMENT = "subfield"; private static final String LEADER_ELEMENT = "leader"; private final int BEFORE_READING = 0; private final int IS_READING = 1; private String xsl_file; private String mapping_file; private String sourcelang; private String targetlang; public ApplyXSLT(String sourcelang, String targetlang){ initLanguages(sourcelang, targetlang); } public ApplyXSLT(String xsl_file, String sourcelang, String targetlang) { this.xsl_file = xsl_file; initLanguages(sourcelang, targetlang); } public ApplyXSLT(String xsl_file, String sourcelang, String targetlang, String mapping_file) { this.xsl_file = xsl_file; this.mapping_file = mapping_file; initLanguages(sourcelang, targetlang); } private void initLanguages(String sourcelang, String targetlang) { this.sourcelang = sourcelang; this.targetlang = targetlang; // if only target language is provided, assume source language is English if(sourcelang.equals("") && !targetlang.equals("")) { this.sourcelang = "en"; } } private boolean process() { try{ // Use System InputStream to receive piped data from the perl program InputStreamReader ir = new InputStreamReader(System.in, "UTF8"); BufferedReader br = new BufferedReader(ir); int system_status = BEFORE_READING; StringBuffer a_doc = new StringBuffer(); String output_file = new String(); while (br.ready()) { String this_line = br.readLine(); if(system_status == BEFORE_READING){ if(this_line.compareTo(DOC_START) == 0){ output_file = br.readLine(); // read the next line as the output file name system_status = IS_READING; a_doc = new StringBuffer(); } else if(this_line.compareTo(INPUT_END) == 0){ return true; } else{ System.err.println("Undefined process status:" + this_line); system_status = BEFORE_READING; } } else if(system_status == IS_READING){ if(this_line.compareTo(DOC_END) == 0){ boolean result = false; if (mapping_file !=null && !mapping_file.equals("")){ result = translateXMLWithMapping(a_doc.toString(), output_file); } else{ result = translateXML(a_doc.toString(), output_file); } if (!result){ System.err.println("Translation Failed!!"); return false; } system_status = BEFORE_READING; } else{ a_doc.append(this_line + "\n"); } } else{ System.err.println ("Undefined system status in ApplyXSLT.java main()."); System.exit(-1); } } }catch (Exception e) { System.err.println("Receiving piped data error!" + e.toString()); } return false; } private boolean translateXML(String full_doc, String output_file) throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException { StringReader str = new StringReader(full_doc) ; TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file)); setTransformerLanguageParams(transformer); // sourcelang and targetlang transformer.transform(new StreamSource(str), new StreamResult(new FileOutputStream(output_file))); return true; } private boolean translateXMLWithMapping(String full_doc, String output_file) throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException { StringReader str = new StringReader(full_doc) ; try{ TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file)); Document mapping_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(mapping_file); Element mapping =mapping_doc.getDocumentElement(); transformer.setParameter("mapping",mapping); setTransformerLanguageParams(transformer); // sourcelang and targetlang Document output_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); transformer.transform(new StreamSource(str), new DOMResult(output_doc)); calculateRecordsLength(output_doc); transformer = tFactory.newTransformer(); transformer.transform(new DOMSource(output_doc), new StreamResult(new FileOutputStream(output_file))); } catch(Exception e){ e.printStackTrace(); return false; } return true; } private void calculateRecordsLength(Document output_doc){ NodeList records = output_doc.getDocumentElement().getElementsByTagName(RECORD_ELEMENT); for(int i=0;i0){ Element leader_element = (Element)leaders.item(0); removeFirstTextNode(leader_element); leader_element.insertBefore(leader_element.getOwnerDocument().createTextNode(record_length),leader_element.getFirstChild()); } } private void removeFirstTextNode(Element element){ //remove the first text node NodeList children_nodelist = element.getChildNodes(); for (int i = 0; i < children_nodelist.getLength(); i++) { Node child_node = children_nodelist.item(i); if (child_node.getNodeType() == Node.TEXT_NODE) { element.removeChild(child_node); return; } } } private String getElementTextValue(Element element) { String text =""; // Find the node child NodeList children_nodelist = element.getChildNodes(); for (int i = 0; i < children_nodelist.getLength(); i++) { Node child_node = children_nodelist.item(i); if (child_node.getNodeType() == Node.TEXT_NODE) { text +=child_node.getNodeValue(); } } return text; } private void setMappingVariable(Document style_doc){ Node child = style_doc.getDocumentElement().getFirstChild(); while(child != null) { String name = child.getNodeName(); if (name.equals("xsl:variable")) { Element variable_element = (Element)child; if ( variable_element.getAttribute("name").trim().equals("mapping")){ variable_element.setAttribute("select","document('"+mapping_file+"')/Mapping"); } } child = child.getNextSibling(); } } private void setTransformerLanguageParams(Transformer transformer) { if(targetlang != "") { transformer.setParameter("sourcelang",sourcelang); transformer.setParameter("targetlang",targetlang); } } private void translate(String xml_file, String xsl_file, String output_file)throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException, IOException{ TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file)); OutputStreamWriter output = null; if (output_file.equals("")) { output = new OutputStreamWriter(System.out, "UTF-8"); } else{ output = new OutputStreamWriter(new FileOutputStream(output_file), "UTF-8"); } setTransformerLanguageParams(transformer); // sourcelang and targetlang transformer.transform(new StreamSource(new File(xml_file)),new StreamResult(output)); } static public String replaceAll(String source_string, String match_regexp, String replace_string) { return source_string.replaceAll(match_regexp, replace_string); } // Necessary for paperspast.dm, but can be used generally. // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target // This function returns just the 'string' portion of the chunk of data: e.g 'All newspapers' and 'Niupepa katoa' static public String getChunkString(String target_file_text) { int startindex = target_file_text.indexOf("["); if(startindex != 0) { return target_file_text; } // to test that the input requires processing // else startindex = target_file_text.indexOf("{"); int endindex = target_file_text.lastIndexOf("}"); if(startindex != -1 && endindex != -1) { return target_file_text.substring(startindex+1, endindex); // skips { and } } else { return target_file_text; } } // Necessary for paperspast.dm, but can be used generally. // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target // This function returns just the 'attribute' portion of the chunk of data: e.g 'c=paperspast' and 'c=paperspast,l=mi' static public String getChunkAttr(String target_file_text) { int startindex = target_file_text.indexOf("["); if(startindex != 0) { return target_file_text; } // to test that the input requires processing // else startindex = target_file_text.indexOf("{"); int endindex = target_file_text.lastIndexOf("}"); if(startindex != -1 && endindex != -1) { endindex = target_file_text.lastIndexOf("]", startindex); // look for ] preceding the { if(endindex > 1) { //if(endindex != -1) { // so there's something to substring between [ and ] return target_file_text.substring(1, endindex).trim(); // skips [ and ] } } return target_file_text; } public static void main(String[] args) { String xml_file=""; String xsl_file=""; String mapping_file=""; String output_file=""; String sourcelang=""; String targetlang=""; // Checking Arguments if(args.length < 1) { printUsage(); } for (int i=0;i