root/other-projects/gti/gti-tmx-to-txt.xsl @ 25242

Revision 25242, 2.5 KB (checked in by ak19, 8 years ago)

With Sam's help. 1. Updated gti-generate-excel-xml.xsl file to no longer output an excess of newlines. It now further turns ampersand characters in XML input files into their character entity values in the excel.xml file output. 2. Added 2 new XSLT files gti-generate-tmx-xml and gti-tmx-to-txt, which need to still be tested in practice. The first takes GS translation XML files containing chunks of strings to be translated and generates Translation Memory eXchange from it. TMX is an open-XML format also accepted by the Google Translation toolkit, so that translators can use this toolkit to do the translations if they wish. The 2nd XSLT file takes the TMX file returned by translators and outputs the UTF-16 txt file that was expected when processing spreadsheet translations previously, so that it fits into the same processing pipeline. These 2 new XSLT files, when applied by ApplyXSLT.jar/java on their input XML, require an additional parameter (the targetlanguage) and further take an optional parameter (source language). ApplyXSLT has been updated to work with this.

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java" xmlns:tmx="http://www.lisa.org/tmx14">
3  <!-- The necessity for using xml namespaces all over and matching on namespaced element names http://www.stylusstudio.com/xsllist/200302/post70120.html -->
4
5  <xsl:output method="text" encoding="UTF-16"/> <!-- When we save as txt from Excel, we choose UTF-16 too -->
6
7  <xsl:template match="tmx:tmx">
8    <xsl:apply-templates select="tmx:body"/>
9  </xsl:template>
10
11  <xsl:template match="tmx:body">
12    <xsl:text>Key&#09;Text</xsl:text>
13    <xsl:text>&#10;&#10;</xsl:text> <!--2 newlines--> 
14    <xsl:apply-templates select="tmx:tu"/>
15  </xsl:template>
16
17  <xsl:template match="tmx:tu">
18    <xsl:for-each select="tmx:tuv">
19      <xsl:if test="tmx:prop[@type='source']">
20    <xsl:text>source::</xsl:text>
21    <xsl:value-of select="tmx:prop"/>
22    <xsl:text>&#09;</xsl:text><!-- tab -->
23    <xsl:variable name="tempText1"><xsl:value-of select="tmx:seg"/></xsl:variable>
24    <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&amp;#10;&#10;", "\n")'/>
25    <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&amp;lt;", "&#60;")'/>
26    <xsl:variable name="tempText4" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&amp;gt;", "&#62;")'/>
27    <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText4, "&amp;amp;", "&#38;")'/>
28    <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
29    <xsl:text>&#10;</xsl:text><!-- newline -->
30    </xsl:if>
31    </xsl:for-each>
32
33    <xsl:for-each select="tmx:tuv">
34      <xsl:if test="tmx:prop[@type='target']">
35    <xsl:text>target::</xsl:text>
36    <xsl:value-of select="tmx:prop"/>
37    <xsl:text>&#09;</xsl:text><!-- tab -->
38    <xsl:variable name="tempText1"><xsl:value-of select="tmx:seg"/></xsl:variable>
39    <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&amp;#10;&#10;", "\n")'/>
40    <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&amp;lt;", "&#60;")'/>
41    <xsl:variable name="tempText4" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&amp;gt;", "&#62;")'/>
42    <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText4, "&amp;amp;", "&#38;")'/>
43    <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
44    <xsl:text>&#10;&#10;</xsl:text><!-- 2 newlines -->
45      </xsl:if>
46    </xsl:for-each>
47  </xsl:template>
48
49</xsl:stylesheet>
Note: See TracBrowser for help on using the browser.