source: main/trunk/greenstone2/build-src/src/java/org/nzdl/gsdl/ApplyXSLT.java@ 25241

Last change on this file since 25241 was 25241, checked in by ak19, 12 years ago

Updated ApplyXSLT to work with the 2 new XSLT files gti-generate-tmx-xml and gti-tmx-to-txt which require an additional parameter (the targetlanguage) and further take an optional parameter (source language).

  • Property svn:keywords set to Author Date Id Revision
File size: 13.6 KB
Line 
1/**********************************************************************
2 *
3 * ApplyXSLT.java
4 *
5 * Copyright 2006-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27
28package org.nzdl.gsdl;
29
30import java.io.*;
31
32import javax.xml.transform.Transformer;
33import javax.xml.transform.TransformerConfigurationException;
34import javax.xml.transform.TransformerException;
35import javax.xml.transform.TransformerFactory;
36import javax.xml.transform.stream.StreamResult;
37import javax.xml.transform.stream.StreamSource;
38
39import javax.xml.parsers.*;
40import javax.xml.transform.dom.*;
41import org.w3c.dom.*;
42
43
44
45/**
46 * Use the TraX interface to perform a transformation in the simplest manner possible
47 * (3 statements).
48 */
49public class ApplyXSLT
50{
51
52 public static final String DOC_START = new String ("<?DocStart?>");
53 public static final String DOC_END = new String ("<?DocEnd?>");
54 public static final String INPUT_END = new String ("<?Done?>");
55
56 private static final String RECORD_ELEMENT = "record";
57 private static final String CONTROLFIELD_ELEMENT = "controlfield";
58 private static final String SUBFIELD_ELEMENT = "subfield";
59 private static final String LEADER_ELEMENT = "leader";
60
61 private final int BEFORE_READING = 0;
62 private final int IS_READING = 1;
63 private String xsl_file;
64 private String mapping_file;
65
66 private String sourcelang;
67 private String targetlang;
68
69 public ApplyXSLT(String sourcelang, String targetlang){
70 initLanguages(sourcelang, targetlang);
71 }
72
73 public ApplyXSLT(String xsl_file, String sourcelang, String targetlang)
74 {
75 this.xsl_file = xsl_file;
76 initLanguages(sourcelang, targetlang);
77 }
78
79 public ApplyXSLT(String xsl_file, String sourcelang, String targetlang, String mapping_file) {
80 this.xsl_file = xsl_file;
81 this.mapping_file = mapping_file;
82 initLanguages(sourcelang, targetlang);
83 }
84
85 private void initLanguages(String sourcelang, String targetlang)
86 {
87 this.sourcelang = sourcelang;
88 this.targetlang = targetlang;
89 // if only target language is provided, assume source language is English
90 if(sourcelang.equals("") && !targetlang.equals("")) {
91 this.sourcelang = "en";
92 }
93 }
94
95 private boolean process()
96 {
97 try{
98
99 // Use System InputStream to receive piped data from the perl program
100 InputStreamReader ir = new InputStreamReader(System.in, "UTF8");
101 BufferedReader br = new BufferedReader(ir);
102
103 int system_status = BEFORE_READING;
104 StringBuffer a_doc = new StringBuffer();
105 String output_file = new String();
106
107
108 while (br.ready()) {
109
110 String this_line = br.readLine();
111 if(system_status == BEFORE_READING){
112 if(this_line.compareTo(DOC_START) == 0){
113 output_file = br.readLine(); // read the next line as the output file name
114 system_status = IS_READING;
115 a_doc = new StringBuffer();
116 }
117 else if(this_line.compareTo(INPUT_END) == 0){
118 return true;
119 }
120 else{
121 System.err.println("Undefined process status:" + this_line);
122 system_status = BEFORE_READING;
123 }
124
125 }
126 else if(system_status == IS_READING){
127 if(this_line.compareTo(DOC_END) == 0){
128 boolean result = false;
129 if (mapping_file !=null && !mapping_file.equals("")){
130 result = translateXMLWithMapping(a_doc.toString(), output_file);
131 }
132 else{
133 result = translateXML(a_doc.toString(), output_file);
134 }
135
136 if (!result){
137 System.err.println("Translation Failed!!");
138 return false;
139 }
140
141 system_status = BEFORE_READING;
142
143 }
144 else{
145 a_doc.append(this_line + "\n");
146 }
147 }
148 else{
149 System.err.println ("Undefined system status in ApplyXSLT.java main().");
150 System.exit(-1);
151 }
152
153 }
154 }catch (Exception e)
155 {
156 System.err.println("Receiving piped data error!" + e.toString());
157 }
158
159 return false;
160 }
161
162
163 private boolean translateXML(String full_doc, String output_file)
164 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
165 {
166
167 StringReader str = new StringReader(full_doc) ;
168
169 TransformerFactory tFactory = TransformerFactory.newInstance();
170 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
171
172 setTransformerLanguageParams(transformer); // sourcelang and targetlang
173
174 transformer.transform(new StreamSource(str), new StreamResult(new FileOutputStream(output_file)));
175 return true;
176 }
177
178 private boolean translateXMLWithMapping(String full_doc, String output_file)
179 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
180 {
181 StringReader str = new StringReader(full_doc) ;
182
183 try{
184 TransformerFactory tFactory = TransformerFactory.newInstance();
185 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
186
187 Document mapping_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(mapping_file);
188 Element mapping =mapping_doc.getDocumentElement();
189
190 transformer.setParameter("mapping",mapping);
191 setTransformerLanguageParams(transformer); // sourcelang and targetlang
192
193 Document output_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
194
195 transformer.transform(new StreamSource(str), new DOMResult(output_doc));
196
197 calculateRecordsLength(output_doc);
198
199 transformer = tFactory.newTransformer();
200
201 transformer.transform(new DOMSource(output_doc), new StreamResult(new FileOutputStream(output_file)));
202
203 }
204 catch(Exception e){
205 e.printStackTrace();
206 return false;
207 }
208
209 return true;
210 }
211
212 private void calculateRecordsLength(Document output_doc){
213 NodeList records = output_doc.getDocumentElement().getElementsByTagName(RECORD_ELEMENT);
214
215 for(int i=0;i<records.getLength();i++){
216 Element record = (Element)records.item(i);
217 calculateRecordLength(record);
218 }
219 }
220
221 private void calculateRecordLength(Element record){
222 int total_length =0;
223 NodeList controlfileds = record.getElementsByTagName(CONTROLFIELD_ELEMENT);
224 for(int i=0;i<controlfileds.getLength();i++){
225 Element controlfiled = (Element)controlfileds.item(i);
226 total_length +=getElementTextValue(controlfiled).length();
227 }
228
229 NodeList subfileds = record.getElementsByTagName(SUBFIELD_ELEMENT);
230 for(int i=0;i<subfileds.getLength();i++){
231 Element subfiled = (Element)subfileds.item(i);
232 total_length +=getElementTextValue(subfiled).length();
233 }
234
235 String record_length = total_length+"";
236 //fill in a extra digit as record length needs to be five characters long
237 if (total_length < 10000){
238 record_length = "0"+record_length;
239 if (total_length < 1000){
240 record_length = "0"+record_length;
241 }
242 if (total_length < 100){
243 record_length = "0"+record_length;
244 }
245 if (total_length < 10){
246 record_length = "0"+record_length;
247 }
248
249 }
250
251 NodeList leaders = record.getElementsByTagName(LEADER_ELEMENT);
252
253 //only one leader element
254 if (leaders.getLength() >0){
255 Element leader_element = (Element)leaders.item(0);
256 removeFirstTextNode(leader_element);
257 leader_element.insertBefore(leader_element.getOwnerDocument().createTextNode(record_length),leader_element.getFirstChild());
258 }
259
260 }
261
262 private void removeFirstTextNode(Element element){
263 //remove the first text node
264 NodeList children_nodelist = element.getChildNodes();
265 for (int i = 0; i < children_nodelist.getLength(); i++) {
266 Node child_node = children_nodelist.item(i);
267 if (child_node.getNodeType() == Node.TEXT_NODE) {
268 element.removeChild(child_node);
269 return;
270 }
271 }
272
273 }
274
275 private String getElementTextValue(Element element)
276 {
277 String text ="";
278
279 // Find the node child
280 NodeList children_nodelist = element.getChildNodes();
281 for (int i = 0; i < children_nodelist.getLength(); i++) {
282 Node child_node = children_nodelist.item(i);
283 if (child_node.getNodeType() == Node.TEXT_NODE) {
284 text +=child_node.getNodeValue();
285 }
286 }
287
288 return text;
289 }
290
291
292 private void setMappingVariable(Document style_doc){
293 Node child = style_doc.getDocumentElement().getFirstChild();
294 while(child != null) {
295 String name = child.getNodeName();
296 if (name.equals("xsl:variable")) {
297 Element variable_element = (Element)child;
298 if ( variable_element.getAttribute("name").trim().equals("mapping")){
299 variable_element.setAttribute("select","document('"+mapping_file+"')/Mapping");
300 }
301 }
302 child = child.getNextSibling();
303 }
304
305 }
306
307 private void setTransformerLanguageParams(Transformer transformer)
308 {
309 if(targetlang != "") {
310 transformer.setParameter("sourcelang",sourcelang);
311 transformer.setParameter("targetlang",targetlang);
312 }
313 }
314
315 private void translate(String xml_file, String xsl_file, String output_file)throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException, IOException{
316
317 TransformerFactory tFactory = TransformerFactory.newInstance();
318 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
319
320 OutputStreamWriter output = null;
321 if (output_file.equals("")) {
322 output = new OutputStreamWriter(System.out, "UTF-8");
323 }
324 else{
325 output = new OutputStreamWriter(new FileOutputStream(output_file), "UTF-8");
326 }
327
328 setTransformerLanguageParams(transformer); // sourcelang and targetlang
329 transformer.transform(new StreamSource(new File(xml_file)),new StreamResult(output));
330
331 }
332
333 static public String replaceAll(String source_string, String match_regexp, String replace_string)
334 {
335 return source_string.replaceAll(match_regexp, replace_string);
336 }
337
338
339 public static void main(String[] args)
340 {
341
342 String xml_file="";
343 String xsl_file="";
344 String mapping_file="";
345 String output_file="";
346
347 String sourcelang="";
348 String targetlang="";
349
350 // Checking Arguments
351 if(args.length < 1)
352 {
353 printUsage();
354 }
355
356 for (int i=0;i<args.length;i++){
357 if (args[i].equals("-m") && i+1 < args.length && !args[i+1].startsWith("-")){
358 mapping_file = args[++i];
359 checkFile(mapping_file.replaceAll("file:///",""));
360 }
361 else if (args[i].equals("-x") && i+1 < args.length && !args[i+1].startsWith("-")){
362 xml_file = args[++i];
363 checkFile(xml_file.replaceAll("file:///",""));
364 }
365 else if(args[i].equals("-t") && i+1 < args.length && !args[i+1].startsWith("-")){
366 xsl_file = args[++i];
367 checkFile( xsl_file.replaceAll("file:///",""));
368 }
369 else if(args[i].equals("-o") && i+1 < args.length && !args[i+1].startsWith("-")){
370 output_file = args[++i];
371
372 }
373 // The two language parameters (-s and -l) are for the gti-generate-tmx-xml file
374 // which requires the target lang (code), and will accept the optional source lang (code)
375 else if(args[i].equals("-s") && i+1 < args.length && !args[i+1].startsWith("-")){
376 sourcelang = args[++i];
377 }
378 else if(args[i].equals("-l") && i+1 < args.length && !args[i+1].startsWith("-")){
379 targetlang = args[++i];
380 }
381 else if(args[i].equals("-h")){
382 printUsage();
383 }
384 else{
385 printUsage();
386 }
387
388 }
389
390
391 ApplyXSLT core = null;
392
393 if (xml_file.equals("") && !xsl_file.equals("")){//read from pipe line
394 if (mapping_file.equals("")){
395 core = new ApplyXSLT(xsl_file, sourcelang, targetlang);
396 }
397 else{
398 core = new ApplyXSLT(xsl_file,mapping_file, sourcelang, targetlang);
399 }
400
401 if (core != null){
402 core.process();
403 }
404 else{
405 printUsage();
406 }
407 }
408 else if(!xml_file.equals("") && !xsl_file.equals("")){
409 core = new ApplyXSLT(sourcelang, targetlang);
410 try {
411 core.translate(xml_file,xsl_file,output_file);
412 }
413 catch(Exception e){e.printStackTrace();}
414 }
415 else{
416 printUsage();
417 }
418
419 }
420
421 private static void checkFile(String filename){
422 File file = new File(filename);
423 if (!file.exists()){
424 System.out.println("Error: "+filename+" doesn't exist!");
425 System.exit(-1);
426 }
427 }
428
429 private static void printUsage(){
430 System.out.println("Usage: ApplyXSLT -x File -t File [-m File] [-o File] [-s sourcelang] [-l targetlang]");
431 System.out.println("\t-x specifies the xml file (Note: optional for piped xml data)");
432 System.out.println("\t-t specifies the xsl file");
433 System.out.println("\t-m specifies the mapping file (for MARCXMLPlugout.pm only)");
434 System.out.println("\t-o specifies the output file name (output to screen if this option is absent)");
435 System.out.println("\t-s specifies the input language code for generating TMX file. Defaults to 'en' if none is provided");
436 System.out.println("\t-l specifies the output language code. Required if generating a TMX file.");
437 System.exit(-1);
438 }
439}
440
441
Note: See TracBrowser for help on using the repository browser.