source: main/trunk/greenstone2/build-src/src/java/org/nzdl/gsdl/ApplyXSLT.java@ 28709

Last change on this file since 28709 was 28709, checked in by kjdon, 10 years ago

allow mapping file when processing an xml file

  • Property svn:keywords set to Author Date Id Revision
File size: 18.5 KB
Line 
1/**********************************************************************
2 *
3 * ApplyXSLT.java
4 *
5 * Copyright 2006-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27
28package org.nzdl.gsdl;
29
30import java.io.*;
31import java.util.HashMap;
32import java.util.Iterator;
33import java.util.Map;
34import java.util.Map.Entry;
35
36import javax.xml.transform.Transformer;
37import javax.xml.transform.TransformerConfigurationException;
38import javax.xml.transform.TransformerException;
39import javax.xml.transform.TransformerFactory;
40import javax.xml.transform.stream.StreamResult;
41import javax.xml.transform.stream.StreamSource;
42
43import javax.xml.parsers.*;
44import javax.xml.transform.dom.*;
45import org.w3c.dom.*;
46
47
48
49/**
50 * Use the TraX interface to perform a transformation in the simplest manner possible
51 * (3 statements).
52 */
53public class ApplyXSLT
54{
55
56 public static final String DOC_START = new String ("<?DocStart?>");
57 public static final String DOC_END = new String ("<?DocEnd?>");
58 public static final String INPUT_END = new String ("<?Done?>");
59
60 private static final String RECORD_ELEMENT = "record";
61 private static final String CONTROLFIELD_ELEMENT = "controlfield";
62 private static final String SUBFIELD_ELEMENT = "subfield";
63 private static final String LEADER_ELEMENT = "leader";
64
65 private final int BEFORE_READING = 0;
66 private final int IS_READING = 1;
67 private String xsl_file;
68 private String mapping_file;
69
70 private String sourcelang;
71 private String targetlang;
72 private HashMap paramMap;
73
74 public ApplyXSLT(String sourcelang, String targetlang, HashMap param_map){
75 initParams(sourcelang, targetlang, param_map);
76 }
77
78 public ApplyXSLT(String xsl_file, String sourcelang, String targetlang, HashMap param_map)
79 {
80 this.xsl_file = xsl_file;
81 initParams(sourcelang, targetlang, param_map);
82 }
83
84 public ApplyXSLT(String xsl_file, String mapping_file, String sourcelang, String targetlang, HashMap param_map) {
85 this.xsl_file = xsl_file;
86 this.mapping_file = mapping_file;
87 initParams(sourcelang, targetlang, param_map);
88 }
89
90 private void initParams(String sourcelang, String targetlang, HashMap param_map)
91 {
92 this.sourcelang = sourcelang;
93 this.targetlang = targetlang;
94 // if only target language is provided, assume source language is English
95 if(sourcelang.equals("") && !targetlang.equals("")) {
96 this.sourcelang = "en";
97 }
98
99 // any custom parameters to be passed into the XSLT would be in the map by now
100 paramMap = param_map;
101 }
102
103 private boolean process()
104 {
105 try{
106
107 // Use System InputStream to receive piped data from the perl program
108 InputStreamReader ir = new InputStreamReader(System.in, "UTF8");
109 BufferedReader br = new BufferedReader(ir);
110
111 int system_status = BEFORE_READING;
112 StringBuffer a_doc = new StringBuffer();
113 String output_file = new String();
114
115
116 while (br.ready()) {
117
118 String this_line = br.readLine();
119 if(system_status == BEFORE_READING){
120 if(this_line.compareTo(DOC_START) == 0){
121 output_file = br.readLine(); // read the next line as the output file name
122 system_status = IS_READING;
123 a_doc = new StringBuffer();
124 }
125 else if(this_line.compareTo(INPUT_END) == 0){
126 return true;
127 }
128 else{
129 System.err.println("Undefined process status:" + this_line);
130 system_status = BEFORE_READING;
131 }
132
133 }
134 else if(system_status == IS_READING){
135 if(this_line.compareTo(DOC_END) == 0){
136 boolean result = false;
137 if (mapping_file !=null && !mapping_file.equals("")){
138 result = translateXMLWithMapping(a_doc.toString(), output_file);
139 }
140 else{
141 result = translateXML(a_doc.toString(), output_file);
142 }
143
144 if (!result){
145 System.err.println("Translation Failed!!");
146 return false;
147 }
148
149 system_status = BEFORE_READING;
150
151 }
152 else{
153 a_doc.append(this_line + "\n");
154 }
155 }
156 else{
157 System.err.println ("Undefined system status in ApplyXSLT.java main().");
158 System.exit(-1);
159 }
160
161 }
162 }catch (Exception e)
163 {
164 System.err.println("Receiving piped data error!" + e.toString());
165 }
166
167 return false;
168 }
169
170
171 private boolean translateXML(String full_doc, String output_file)
172 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
173 {
174
175 StringReader str = new StringReader(full_doc) ;
176
177 TransformerFactory tFactory = TransformerFactory.newInstance();
178 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
179
180 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
181
182 transformer.transform(new StreamSource(str), new StreamResult(new FileOutputStream(output_file)));
183 return true;
184 }
185
186 private boolean translateXMLWithMapping(String full_doc, String output_file)
187 throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException
188 {
189 StringReader str = new StringReader(full_doc) ;
190
191 try{
192 TransformerFactory tFactory = TransformerFactory.newInstance();
193 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
194
195 Document mapping_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(mapping_file);
196 Element mapping =mapping_doc.getDocumentElement();
197
198 transformer.setParameter("mapping",mapping);
199 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
200
201 Document output_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
202
203 transformer.transform(new StreamSource(str), new DOMResult(output_doc));
204
205 calculateRecordsLength(output_doc);
206
207 transformer = tFactory.newTransformer();
208
209 transformer.transform(new DOMSource(output_doc), new StreamResult(new FileOutputStream(output_file)));
210
211 }
212 catch(Exception e){
213 e.printStackTrace();
214 return false;
215 }
216
217 return true;
218 }
219
220 private void calculateRecordsLength(Document output_doc){
221 NodeList records = output_doc.getDocumentElement().getElementsByTagName(RECORD_ELEMENT);
222
223 for(int i=0;i<records.getLength();i++){
224 Element record = (Element)records.item(i);
225 calculateRecordLength(record);
226 }
227 }
228
229 private void calculateRecordLength(Element record){
230 int total_length =0;
231 NodeList controlfileds = record.getElementsByTagName(CONTROLFIELD_ELEMENT);
232 for(int i=0;i<controlfileds.getLength();i++){
233 Element controlfiled = (Element)controlfileds.item(i);
234 total_length +=getElementTextValue(controlfiled).length();
235 }
236
237 NodeList subfileds = record.getElementsByTagName(SUBFIELD_ELEMENT);
238 for(int i=0;i<subfileds.getLength();i++){
239 Element subfiled = (Element)subfileds.item(i);
240 total_length +=getElementTextValue(subfiled).length();
241 }
242
243 String record_length = total_length+"";
244 //fill in a extra digit as record length needs to be five characters long
245 if (total_length < 10000){
246 record_length = "0"+record_length;
247 if (total_length < 1000){
248 record_length = "0"+record_length;
249 }
250 if (total_length < 100){
251 record_length = "0"+record_length;
252 }
253 if (total_length < 10){
254 record_length = "0"+record_length;
255 }
256
257 }
258
259 NodeList leaders = record.getElementsByTagName(LEADER_ELEMENT);
260
261 //only one leader element
262 if (leaders.getLength() >0){
263 Element leader_element = (Element)leaders.item(0);
264 removeFirstTextNode(leader_element);
265 leader_element.insertBefore(leader_element.getOwnerDocument().createTextNode(record_length),leader_element.getFirstChild());
266 }
267
268 }
269
270 private void removeFirstTextNode(Element element){
271 //remove the first text node
272 NodeList children_nodelist = element.getChildNodes();
273 for (int i = 0; i < children_nodelist.getLength(); i++) {
274 Node child_node = children_nodelist.item(i);
275 if (child_node.getNodeType() == Node.TEXT_NODE) {
276 element.removeChild(child_node);
277 return;
278 }
279 }
280
281 }
282
283 private String getElementTextValue(Element element)
284 {
285 String text ="";
286
287 // Find the node child
288 NodeList children_nodelist = element.getChildNodes();
289 for (int i = 0; i < children_nodelist.getLength(); i++) {
290 Node child_node = children_nodelist.item(i);
291 if (child_node.getNodeType() == Node.TEXT_NODE) {
292 text +=child_node.getNodeValue();
293 }
294 }
295
296 return text;
297 }
298
299
300 private void setMappingVariable(Document style_doc){
301 Node child = style_doc.getDocumentElement().getFirstChild();
302 while(child != null) {
303 String name = child.getNodeName();
304 if (name.equals("xsl:variable")) {
305 Element variable_element = (Element)child;
306 if ( variable_element.getAttribute("name").trim().equals("mapping")){
307 variable_element.setAttribute("select","document('"+mapping_file+"')/Mapping");
308 }
309 }
310 child = child.getNextSibling();
311 }
312
313 }
314
315 private void setTransformerParams(Transformer transformer)
316 {
317 if(targetlang != "") {
318 transformer.setParameter("sourcelang",sourcelang);
319 transformer.setParameter("targetlang",targetlang);
320 }
321
322 // handle any custom parameters that are also to be passed into the XSLT
323 Iterator i = paramMap.entrySet().iterator();
324 while(i.hasNext()) {
325 Map.Entry entry = (Map.Entry)i.next();
326 String paramName = (String)entry.getKey();
327 String paramValue = (String)entry.getValue();
328
329 transformer.setParameter(paramName, paramValue);
330 }
331
332 }
333
334 private void translate(String xml_file, String xsl_file, String output_file)throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException, IOException{
335
336 TransformerFactory tFactory = TransformerFactory.newInstance();
337 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
338
339 OutputStreamWriter output = null;
340 if (output_file.equals("")) {
341 output = new OutputStreamWriter(System.out, "UTF-8");
342 }
343 else{
344 output = new OutputStreamWriter(new FileOutputStream(output_file), "UTF-8");
345 }
346
347 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
348 transformer.transform(new StreamSource(new File(xml_file)),new StreamResult(output));
349
350 }
351 private void translateWithMapping(String xml_file, String xsl_file, String mapping_file, String output_file)throws IOException,TransformerException, TransformerConfigurationException, FileNotFoundException {
352
353 TransformerFactory tFactory = TransformerFactory.newInstance();
354 Transformer transformer = tFactory.newTransformer(new StreamSource(xsl_file));
355
356 OutputStreamWriter output = null;
357 if (output_file.equals("")) {
358 output = new OutputStreamWriter(System.out, "UTF-8");
359 }
360 else{
361 output = new OutputStreamWriter(new FileOutputStream(output_file), "UTF-8");
362 }
363 try {
364 Document mapping_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(mapping_file);
365 Element mapping =mapping_doc.getDocumentElement();
366
367 transformer.setParameter("mapping",mapping);
368 } catch (Exception e) {
369 System.err.println("Couldn't load in mapping file");
370 e.printStackTrace();
371 }
372 setTransformerParams(transformer); // sourcelang and targetlang and any further custom parameters to be passed into the XSLT
373 transformer.transform(new StreamSource(new File(xml_file)),new StreamResult(output));
374
375 }
376
377 static public String replaceAll(String source_string, String match_regexp, String replace_string)
378 {
379 return source_string.replaceAll(match_regexp, replace_string);
380 }
381
382 // Necessary for paperspast.dm, but can be used generally.
383 // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
384 // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
385 // This function returns just the 'string' portion of the chunk of data: e.g 'All newspapers' and 'Niupepa katoa'
386 static public String getChunkString(String target_file_text)
387 {
388 int startindex = target_file_text.indexOf("[");
389 if(startindex != 0) {
390 return target_file_text;
391 } // to test that the input requires processing
392
393 // else
394 startindex = target_file_text.indexOf("{");
395 int endindex = target_file_text.lastIndexOf("}");
396 if(startindex != -1 && endindex != -1) {
397 return target_file_text.substring(startindex+1, endindex); // skips { and }
398 } else {
399 return target_file_text;
400 }
401
402 }
403
404 // Necessary for paperspast.dm, but can be used generally.
405 // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
406 // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
407 // This function returns just the 'attribute' portion of the chunk of data: e.g 'c=paperspast' and 'c=paperspast,l=mi'
408 static public String getChunkAttr(String target_file_text)
409 {
410 int startindex = target_file_text.indexOf("[");
411 if(startindex != 0) {
412 return target_file_text;
413 } // to test that the input requires processing
414
415 // else
416 startindex = target_file_text.indexOf("{");
417 int endindex = target_file_text.lastIndexOf("}");
418 if(startindex != -1 && endindex != -1) {
419 endindex = target_file_text.lastIndexOf("]", startindex); // look for ] preceding the {
420 if(endindex > 1) { //if(endindex != -1) {
421 // so there's something to substring between [ and ]
422 return target_file_text.substring(1, endindex).trim(); // skips [ and ]
423 }
424 }
425 return target_file_text;
426 }
427
428 public static void main(String[] args)
429 {
430
431 String xml_file="";
432 String xsl_file="";
433 String mapping_file="";
434 String output_file="";
435
436 String sourcelang="";
437 String targetlang="";
438
439 HashMap paramMap = new HashMap();
440 int index = -1; // index of the '=' sign in cmdline argument specifying custom parameters to be passed into the XSLT
441
442 // Checking Arguments
443 if(args.length < 1)
444 {
445 printUsage();
446 }
447
448 for (int i=0;i<args.length;i++){
449 if (args[i].equals("-m") && i+1 < args.length && !args[i+1].startsWith("-")){
450 mapping_file = args[++i];
451 checkFile(mapping_file.replaceAll("file:///",""));
452 }
453 else if (args[i].equals("-x") && i+1 < args.length && !args[i+1].startsWith("-")){
454 xml_file = args[++i];
455 checkFile(xml_file.replaceAll("file:///",""));
456 }
457 else if(args[i].equals("-t") && i+1 < args.length && !args[i+1].startsWith("-")){
458 xsl_file = args[++i];
459 checkFile( xsl_file.replaceAll("file:///",""));
460 }
461 else if(args[i].equals("-o") && i+1 < args.length && !args[i+1].startsWith("-")){
462 output_file = args[++i];
463
464 }
465 // The two language parameters (-s and -l) are for the gti-generate-tmx-xml file
466 // which requires the target lang (code), and will accept the optional source lang (code)
467 else if(args[i].equals("-s") && i+1 < args.length && !args[i+1].startsWith("-")){
468 sourcelang = args[++i];
469 }
470 else if(args[i].equals("-l") && i+1 < args.length && !args[i+1].startsWith("-")){
471 targetlang = args[++i];
472 }
473 else if(args[i].equals("-h")){
474 printUsage();
475 }
476 else if ((index = args[i].indexOf("=")) != -1) { // custom parameters provided on the cmdline in the form paramName1=paramValue1 paramName2=paramValue2 etc
477 // that are to be passed into the XSLT
478 String paramName = args[i].substring(0, index);
479 String paramValue = args[i].substring(index+1); // skip the = sign
480 paramMap.put(paramName, paramValue);
481 index = -1;
482 }
483 else{
484 printUsage();
485 }
486
487 }
488
489
490 ApplyXSLT core = null;
491
492 if (xml_file.equals("") && !xsl_file.equals("")){//read from pipe line
493 if (mapping_file.equals("")){
494 core = new ApplyXSLT(xsl_file, sourcelang, targetlang, paramMap);
495 }
496 else{
497 core = new ApplyXSLT(xsl_file, mapping_file, sourcelang, targetlang, paramMap);
498 }
499
500 if (core != null){
501 core.process();
502 }
503 else{
504 printUsage();
505 }
506 }
507 else if(!xml_file.equals("") && !xsl_file.equals("")){
508 core = new ApplyXSLT(sourcelang, targetlang, paramMap);
509 try {
510 if (mapping_file.equals("")) {
511 core.translate(xml_file,xsl_file,output_file);
512 } else {
513 core.translateWithMapping(xml_file,xsl_file,mapping_file, output_file);
514 }
515 }
516 catch(Exception e){e.printStackTrace();}
517 }
518 else{
519 printUsage();
520 }
521
522 }
523
524 private static void checkFile(String filename){
525 File file = new File(filename);
526 if (!file.exists()){
527 System.out.println("Error: "+filename+" doesn't exist!");
528 System.exit(-1);
529 }
530 }
531
532 private static void printUsage(){
533 System.out.println("Usage: ApplyXSLT -x File -t File [-m File] [-o File] [-s sourcelang] [-l targetlang] [param-name=param-value]");
534 System.out.println("\t-x specifies the xml file (Note: optional for piped xml data)");
535 System.out.println("\t-t specifies the xsl file");
536 System.out.println("\t-m specifies the mapping file (for MARCXMLPlugout.pm only)");
537 System.out.println("\t-o specifies the output file name (output to screen if this option is absent)");
538 System.out.println("\t-s specifies the input language code for generating TMX file. Defaults to 'en' if none is provided");
539 System.out.println("\t-l specifies the output language code. Required if generating a TMX file.");
540 System.out.println("\tFor general transformations of an XML by an XSLT, you can pass in parameter name=value pairs if any need to passed on into the XSLT as xsl params.");
541 System.exit(-1);
542 }
543}
544
545
Note: See TracBrowser for help on using the repository browser.