source: main/trunk/greenstone2/build-src/src/java/org/nzdl/gsdl/ApplyXSLT.java@ 28384

Last change on this file since 28384 was 28384, checked in by ak19, 11 years ago
  1. Fix: 3rd constructor had its arguments in the wrong order. 2. Added support for custom cmd-line parameters that are to be passed on into the XSLT. This is necessary to allow the xsd-to-mds.xsl XSLT that Jenny recently added to be more general.
  • Property svn:keywords set to Author Date Id Revision
File size: 17.2 KB
Line 
1/**********************************************************************
2 *
3 * ApplyXSLT.java
4 *
5 * Copyright 2006-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27
28package org.nzdl.gsdl;
29
30import java.io.*;
31import java.util.HashMap;
32import java.util.Iterator;
33import java.util.Map;
34import java.util.Map.Entry;
35
36import javax.xml.transform.Transformer;
37import javax.xml.transform.TransformerConfigurationException;
38import javax.xml.transform.TransformerException;
39import javax.xml.transform.TransformerFactory;
40import javax.xml.transform.stream.StreamResult;
41import javax.xml.transform.stream.StreamSource;
42
43import javax.xml.parsers.*;
44import javax.xml.transform.dom.*;
45import org.w3c.dom.*;
46
47
48
49/**
50 * Use the TraX interface to perform a transformation in the simplest manner possible
51 * (3 statements).
52 */
53public class ApplyXSLT
54{
55
56 public static final String DOC_START = new String ("<?DocStart?>");
57 public static final String DOC_END = new String ("<?DocEnd?>");
58 public static final String INPUT_END = new String ("<?Done?>");
59
60 private static final String RECORD_ELEMENT = "record";
61 private static final String CONTROLFIELD_ELEMENT = "controlfield";
62 private static final String SUBFIELD_ELEMENT = "subfield";
63 private static final String LEADER_ELEMENT = "leader";
64
65 private final int BEFORE_READING = 0;
66 private final int IS_READING = 1;
67 private String xsl_file;
68 private String mapping_file;
69
70 private String sourcelang;
71 private String targetlang;
72 private HashMap paramMap;
73
74 public ApplyXSLT(String sourcelang, String targetlang, HashMap param_map){
75 initParams(sourcelang, targetlang, param_map);
76 }
77
78 public ApplyXSLT(String xsl_file, String sourcelang, String targetlang, HashMap param_map)
79 {
80 this.xsl_file = xsl_file;
81 initParams(sourcelang, targetlang, param_map);
82 }
83
84 public ApplyXSLT(String xsl_file, String mapping_file, String sourcelang, String targetlang, HashMap param_map) {
85 this.xsl_file = xsl_file;
86 this.mapping_file = mapping_file;
87 initParams(sourcelang, targetlang, param_map);
88 }
89
90 private void initParams(String sourcelang, String targetlang, HashMap param_map)
91 {
92 this.sourcelang = sourcelang;
93 this.targetlang = targetlang;
94 // if only target language is provided, assume source language is English
95 if(sourcelang.equals("") && !targetlang.equals("")) {
96 this.sourcelang = "en";
97 }
98
99 // any custom parameters to be passed into the XSLT would be in the map by now
100 paramMap = param_map;
101 }
102
103 private boolean process()
104 {
105 try{
106
107 // Use System InputStream to receive piped data from the perl program
108 InputStreamReader ir = new InputStreamReader(System.in, "UTF8");
109 BufferedReader br = new BufferedReader(ir);
110
111 int system_status = BEFORE_READING;
112 StringBuffer a_doc = new StringBuffer();
113 String output_file = new String();
114
115
116 while (br.ready()) {
117
118 String this_line = br.readLine();
119 if(system_status == BEFORE_READING){
120 if(this_line.compareTo(DOC_START) == 0){
121 output_file = br.readLine(); // read the next line as the output file name
122 system_status = IS_READING;
123 a_doc = new StringBuffer();
124 }
125 else if(this_line.compareTo(INPUT_END) == 0){
126 return true;
127 }
128 else{
129 System.err.println("Undefined process status:" + this_line);
130 system_status = BEFORE_READING;
131 }
132
133 }
134 else if(system_status == IS_READING){
135 if(this_line.compareTo(DOC_END) == 0){
136 boolean result = false;
137 if (mapping_file !=null && !mapping_file.equals("")){
138 result = translateXMLWithMapping(a_doc.toString(), output_file);
139 }
140 else{
141 result = translateXML(a_doc.toString(), output_file);
142 }
143
144 if (!result){
145 System.err.println("Translation Failed!!");
146 return false;
147 }
148
149 system_status = BEFORE_READING;
150
151 }
152 else{
153 a_doc.append(this_line + "\n");
154 }
155 }
156 else{
157 System.err.println ("Undefined system status in ApplyXSLT.java main().");
158 System.exit(-1);
159 }
160
161 }
162 }catch (Exception e)
163 {
164 System.err.println("Receiving piped data error!" + e.toString());
165 }
166
167 return false;
168 }
169
170
171 private boolean translateXML(String full_doc, String output_file)
172 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
173 {
174
175 StringReader str = new StringReader(full_doc) ;
176
177 TransformerFactory tFactory = TransformerFactory.newInstance();
178 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
179
180 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
181
182 transformer.transform(new StreamSource(str), new StreamResult(new FileOutputStream(output_file)));
183 return true;
184 }
185
186 private boolean translateXMLWithMapping(String full_doc, String output_file)
187 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
188 {
189 StringReader str = new StringReader(full_doc) ;
190
191 try{
192 TransformerFactory tFactory = TransformerFactory.newInstance();
193 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
194
195 Document mapping_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(mapping_file);
196 Element mapping =mapping_doc.getDocumentElement();
197
198 transformer.setParameter("mapping",mapping);
199 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
200
201 Document output_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
202
203 transformer.transform(new StreamSource(str), new DOMResult(output_doc));
204
205 calculateRecordsLength(output_doc);
206
207 transformer = tFactory.newTransformer();
208
209 transformer.transform(new DOMSource(output_doc), new StreamResult(new FileOutputStream(output_file)));
210
211 }
212 catch(Exception e){
213 e.printStackTrace();
214 return false;
215 }
216
217 return true;
218 }
219
220 private void calculateRecordsLength(Document output_doc){
221 NodeList records = output_doc.getDocumentElement().getElementsByTagName(RECORD_ELEMENT);
222
223 for(int i=0;i<records.getLength();i++){
224 Element record = (Element)records.item(i);
225 calculateRecordLength(record);
226 }
227 }
228
229 private void calculateRecordLength(Element record){
230 int total_length =0;
231 NodeList controlfileds = record.getElementsByTagName(CONTROLFIELD_ELEMENT);
232 for(int i=0;i<controlfileds.getLength();i++){
233 Element controlfiled = (Element)controlfileds.item(i);
234 total_length +=getElementTextValue(controlfiled).length();
235 }
236
237 NodeList subfileds = record.getElementsByTagName(SUBFIELD_ELEMENT);
238 for(int i=0;i<subfileds.getLength();i++){
239 Element subfiled = (Element)subfileds.item(i);
240 total_length +=getElementTextValue(subfiled).length();
241 }
242
243 String record_length = total_length+"";
244 //fill in a extra digit as record length needs to be five characters long
245 if (total_length < 10000){
246 record_length = "0"+record_length;
247 if (total_length < 1000){
248 record_length = "0"+record_length;
249 }
250 if (total_length < 100){
251 record_length = "0"+record_length;
252 }
253 if (total_length < 10){
254 record_length = "0"+record_length;
255 }
256
257 }
258
259 NodeList leaders = record.getElementsByTagName(LEADER_ELEMENT);
260
261 //only one leader element
262 if (leaders.getLength() >0){
263 Element leader_element = (Element)leaders.item(0);
264 removeFirstTextNode(leader_element);
265 leader_element.insertBefore(leader_element.getOwnerDocument().createTextNode(record_length),leader_element.getFirstChild());
266 }
267
268 }
269
270 private void removeFirstTextNode(Element element){
271 //remove the first text node
272 NodeList children_nodelist = element.getChildNodes();
273 for (int i = 0; i < children_nodelist.getLength(); i++) {
274 Node child_node = children_nodelist.item(i);
275 if (child_node.getNodeType() == Node.TEXT_NODE) {
276 element.removeChild(child_node);
277 return;
278 }
279 }
280
281 }
282
283 private String getElementTextValue(Element element)
284 {
285 String text ="";
286
287 // Find the node child
288 NodeList children_nodelist = element.getChildNodes();
289 for (int i = 0; i < children_nodelist.getLength(); i++) {
290 Node child_node = children_nodelist.item(i);
291 if (child_node.getNodeType() == Node.TEXT_NODE) {
292 text +=child_node.getNodeValue();
293 }
294 }
295
296 return text;
297 }
298
299
300 private void setMappingVariable(Document style_doc){
301 Node child = style_doc.getDocumentElement().getFirstChild();
302 while(child != null) {
303 String name = child.getNodeName();
304 if (name.equals("xsl:variable")) {
305 Element variable_element = (Element)child;
306 if ( variable_element.getAttribute("name").trim().equals("mapping")){
307 variable_element.setAttribute("select","document('"+mapping_file+"')/Mapping");
308 }
309 }
310 child = child.getNextSibling();
311 }
312
313 }
314
315 private void setTransformerParams(Transformer transformer)
316 {
317 if(targetlang != "") {
318 transformer.setParameter("sourcelang",sourcelang);
319 transformer.setParameter("targetlang",targetlang);
320 }
321
322 // handle any custom parameters that are also to be passed into the XSLT
323 Iterator i = paramMap.entrySet().iterator();
324 while(i.hasNext()) {
325 Map.Entry entry = (Map.Entry)i.next();
326 String paramName = (String)entry.getKey();
327 String paramValue = (String)entry.getValue();
328
329 transformer.setParameter(paramName, paramValue);
330 }
331
332 }
333
334 private void translate(String xml_file, String xsl_file, String output_file)throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException, IOException{
335
336 TransformerFactory tFactory = TransformerFactory.newInstance();
337 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
338
339 OutputStreamWriter output = null;
340 if (output_file.equals("")) {
341 output = new OutputStreamWriter(System.out, "UTF-8");
342 }
343 else{
344 output = new OutputStreamWriter(new FileOutputStream(output_file), "UTF-8");
345 }
346
347 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
348 transformer.transform(new StreamSource(new File(xml_file)),new StreamResult(output));
349
350 }
351
352 static public String replaceAll(String source_string, String match_regexp, String replace_string)
353 {
354 return source_string.replaceAll(match_regexp, replace_string);
355 }
356
357 // Necessary for paperspast.dm, but can be used generally.
358 // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
359 // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
360 // This function returns just the 'string' portion of the chunk of data: e.g 'All newspapers' and 'Niupepa katoa'
361 static public String getChunkString(String target_file_text)
362 {
363 int startindex = target_file_text.indexOf("[");
364 if(startindex != 0) {
365 return target_file_text;
366 } // to test that the input requires processing
367
368 // else
369 startindex = target_file_text.indexOf("{");
370 int endindex = target_file_text.lastIndexOf("}");
371 if(startindex != -1 && endindex != -1) {
372 return target_file_text.substring(startindex+1, endindex); // skips { and }
373 } else {
374 return target_file_text;
375 }
376
377 }
378
379 // Necessary for paperspast.dm, but can be used generally.
380 // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
381 // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
382 // This function returns just the 'attribute' portion of the chunk of data: e.g 'c=paperspast' and 'c=paperspast,l=mi'
383 static public String getChunkAttr(String target_file_text)
384 {
385 int startindex = target_file_text.indexOf("[");
386 if(startindex != 0) {
387 return target_file_text;
388 } // to test that the input requires processing
389
390 // else
391 startindex = target_file_text.indexOf("{");
392 int endindex = target_file_text.lastIndexOf("}");
393 if(startindex != -1 && endindex != -1) {
394 endindex = target_file_text.lastIndexOf("]", startindex); // look for ] preceding the {
395 if(endindex > 1) { //if(endindex != -1) {
396 // so there's something to substring between [ and ]
397 return target_file_text.substring(1, endindex).trim(); // skips [ and ]
398 }
399 }
400 return target_file_text;
401 }
402
403 public static void main(String[] args)
404 {
405
406 String xml_file="";
407 String xsl_file="";
408 String mapping_file="";
409 String output_file="";
410
411 String sourcelang="";
412 String targetlang="";
413
414 HashMap paramMap = new HashMap();
415 int index = -1; // index of the '=' sign in cmdline argument specifying custom parameters to be passed into the XSLT
416
417 // Checking Arguments
418 if(args.length < 1)
419 {
420 printUsage();
421 }
422
423 for (int i=0;i<args.length;i++){
424 if (args[i].equals("-m") && i+1 < args.length && !args[i+1].startsWith("-")){
425 mapping_file = args[++i];
426 checkFile(mapping_file.replaceAll("file:///",""));
427 }
428 else if (args[i].equals("-x") && i+1 < args.length && !args[i+1].startsWith("-")){
429 xml_file = args[++i];
430 checkFile(xml_file.replaceAll("file:///",""));
431 }
432 else if(args[i].equals("-t") && i+1 < args.length && !args[i+1].startsWith("-")){
433 xsl_file = args[++i];
434 checkFile( xsl_file.replaceAll("file:///",""));
435 }
436 else if(args[i].equals("-o") && i+1 < args.length && !args[i+1].startsWith("-")){
437 output_file = args[++i];
438
439 }
440 // The two language parameters (-s and -l) are for the gti-generate-tmx-xml file
441 // which requires the target lang (code), and will accept the optional source lang (code)
442 else if(args[i].equals("-s") && i+1 < args.length && !args[i+1].startsWith("-")){
443 sourcelang = args[++i];
444 }
445 else if(args[i].equals("-l") && i+1 < args.length && !args[i+1].startsWith("-")){
446 targetlang = args[++i];
447 }
448 else if(args[i].equals("-h")){
449 printUsage();
450 }
451 else if ((index = args[i].indexOf("=")) != -1) { // custom parameters provided on the cmdline in the form paramName1=paramValue1 paramName2=paramValue2 etc
452 // that are to be passed into the XSLT
453 String paramName = args[i].substring(0, index);
454 String paramValue = args[i].substring(index+1); // skip the = sign
455 paramMap.put(paramName, paramValue);
456 index = -1;
457 }
458 else{
459 printUsage();
460 }
461
462 }
463
464
465 ApplyXSLT core = null;
466
467 if (xml_file.equals("") && !xsl_file.equals("")){//read from pipe line
468 if (mapping_file.equals("")){
469 core = new ApplyXSLT(xsl_file, sourcelang, targetlang, paramMap);
470 }
471 else{
472 core = new ApplyXSLT(xsl_file, mapping_file, sourcelang, targetlang, paramMap);
473 }
474
475 if (core != null){
476 core.process();
477 }
478 else{
479 printUsage();
480 }
481 }
482 else if(!xml_file.equals("") && !xsl_file.equals("")){
483 core = new ApplyXSLT(sourcelang, targetlang, paramMap);
484 try {
485 core.translate(xml_file,xsl_file,output_file);
486 }
487 catch(Exception e){e.printStackTrace();}
488 }
489 else{
490 printUsage();
491 }
492
493 }
494
495 private static void checkFile(String filename){
496 File file = new File(filename);
497 if (!file.exists()){
498 System.out.println("Error: "+filename+" doesn't exist!");
499 System.exit(-1);
500 }
501 }
502
503 private static void printUsage(){
504 System.out.println("Usage: ApplyXSLT -x File -t File [-m File] [-o File] [-s sourcelang] [-l targetlang] [param-name=param-value]");
505 System.out.println("\t-x specifies the xml file (Note: optional for piped xml data)");
506 System.out.println("\t-t specifies the xsl file");
507 System.out.println("\t-m specifies the mapping file (for MARCXMLPlugout.pm only)");
508 System.out.println("\t-o specifies the output file name (output to screen if this option is absent)");
509 System.out.println("\t-s specifies the input language code for generating TMX file. Defaults to 'en' if none is provided");
510 System.out.println("\t-l specifies the output language code. Required if generating a TMX file.");
511 System.out.println("\tFor general transformations of an XML by an XSLT, you can pass in parameter name=value pairs if any need to passed on into the XSLT as xsl params.");
512 System.exit(-1);
513 }
514}
515
516
Note: See TracBrowser for help on using the repository browser.