root/other-projects/gti/gti-generate-tmx-xml.xsl @ 25242

Revision 25242, 4.9 KB (checked in by ak19, 7 years ago)

With Sam's help. 1. Updated gti-generate-excel-xml.xsl file to no longer output an excess of newlines. It now further turns ampersand characters in XML input files into their character entity values in the excel.xml file output. 2. Added 2 new XSLT files gti-generate-tmx-xml and gti-tmx-to-txt, which need to still be tested in practice. The first takes GS translation XML files containing chunks of strings to be translated and generates Translation Memory eXchange from it. TMX is an open-XML format also accepted by the Google Translation toolkit, so that translators can use this toolkit to do the translations if they wish. The 2nd XSLT file takes the TMX file returned by translators and outputs the UTF-16 txt file that was expected when processing spreadsheet translations previously, so that it fits into the same processing pipeline. These 2 new XSLT files, when applied by ApplyXSLT.jar/java on their input XML, require an additional parameter (the targetlanguage) and further take an optional parameter (source language). ApplyXSLT has been updated to work with this.

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<!--<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">-->
3<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java">
4<!--On Translation Memory eXchange open-XML: http://www.opentag.com/tmx.htm and http://www.gala-global.org/oscarStandards/tmx/tmx14b.html
5And for character entities: http://www.w3.org/MarkUp/html3/latin1.html-->
6
7  <xsl:output method="xml" encoding="UTF-8"/>
8  <xsl:param name="sourcelang"/>
9  <xsl:param name="targetlang"/>
10
11  <xsl:template match="GTIResponse">
12    <tmx version="1.4" xmlns="http://www.lisa.org/tmx14"><xsl:text>&#10;</xsl:text> <!--newline-->
13    <header creationtool="Greenstone Translator Interface" creationtoolversion="Greenstone2.85"
14        datatype="PlainText" segtype="sentence"
15        adminlang="en-us" srclang="EN"
16        o-tmf="ABCTransMem">
17    </header><xsl:text>&#10;</xsl:text> <!--newline-->
18    <body><xsl:text>&#10;</xsl:text> <!--newline-->
19      <xsl:text>&#10;</xsl:text> <!--newline-->
20      <!--<xsl:apply-templates select="*"/>-->
21      <xsl:apply-templates select="TranslationFile"/>
22      <!-- control the order: first all the strings needing to be updated, then all those that need translating-->
23      <xsl:apply-templates select="ChunksRequiringUpdating"/>
24      <xsl:apply-templates select="ChunksRequiringTranslation"/>
25    </body>
26    </tmx>
27  </xsl:template>
28
29
30  <!-- The input file contains information as follows:
31     <TranslationFile key="coredm" target_file_path="macros/mongolian.dm" num_chunks_translated="353" num_chunks_requiring_translation="32" num_chunks_requiring_updating="18"/>
32     We want this useful information to be present in the output TMX file too. Each attribute of a TranslationFile element will be a separate <prop> property value,
33       where the TranslationFile attribute names become the values of each property's type attribute, and the TranslationFile attribute values are the property contents.-->
34  <xsl:template match="TranslationFile">
35    <xsl:for-each select="@*">
36      <prop type="{name()}"><xsl:value-of select="."/></prop>
37      <xsl:text>&#10;</xsl:text><!--newline-->
38    </xsl:for-each>
39  </xsl:template>
40
41  <xsl:template match="ChunksRequiringTranslation">
42    <xsl:text>&#10;</xsl:text> <!--newline-->
43    <note><xsl:text xml:lang="en">Chunks requiring translating</xsl:text></note><xsl:text>&#10;</xsl:text> <!--newline-->
44    <xsl:apply-templates select="Chunk">
45      <xsl:sort select="@key"/>
46    </xsl:apply-templates>
47  </xsl:template>
48
49  <xsl:template match="ChunksRequiringUpdating">
50    <xsl:text>&#10;</xsl:text> <!--newline-->
51    <note><xsl:text xml:lang="en">Chunks requiring updating</xsl:text></note><xsl:text>&#10;</xsl:text> <!--newline-->
52    <xsl:apply-templates select="Chunk">
53      <xsl:sort select="@key"/>
54    </xsl:apply-templates>
55  </xsl:template>
56
57
58  <xsl:template match="Chunk">
59    <tu><xsl:text>&#10;</xsl:text> <!--newline-->
60      <tuv xml:lang="{$sourcelang}">
61    <prop type="source"><xsl:value-of select="@key"/></prop><xsl:text>&#10;</xsl:text> <!--newline-->
62    <seg>
63      <xsl:for-each select="SourceFileText">
64        <xsl:variable name="tempText1" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll(., "\n", "&amp;#10;&#10;")'/>
65        <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&#60;", "&amp;lt;")'/>
66        <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&#62;", "&amp;gt;")'/>
67        <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&#38;", "&amp;amp;")'/> <!-- ampersands-->
68        <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
69          </xsl:for-each>
70    </seg><xsl:text>&#10;</xsl:text> <!--newline-->
71      </tuv><xsl:text>&#10;</xsl:text> <!--newline-->
72      <tuv xml:lang="{$targetlang}">
73    <prop type="target"><xsl:value-of select="@key"/></prop><xsl:text>&#10;</xsl:text> <!--newline-->
74    <seg>
75      <xsl:for-each select="TargetFileText">
76        <xsl:variable name="tempText1" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll(., "\n", "&amp;#10;&#10;")'/>
77        <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&#60;", "&amp;lt;")'/>
78        <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&#62;", "&amp;gt;")'/>
79        <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&#38;", "&amp;amp;")'/> <!-- ampersands-->
80        <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
81          </xsl:for-each>
82    </seg><xsl:text>&#10;</xsl:text> <!--newline-->
83      </tuv><xsl:text>&#10;</xsl:text> <!--newline-->
84    </tu><xsl:text>&#10;&#10;</xsl:text> <!--newline-->
85  </xsl:template>
86
87  <!--<xsl:template match="*">Do nothing for all other templates matched</xsl:template>-->
88
89</xsl:stylesheet>
Note: See TracBrowser for help on using the browser.