root/other-projects/gti/gti-generate-tmx-xml.xsl

Revision 25287, 8.0 KB (checked in by ak19, 8 years ago)

1. Added two new XSLT files: both generate the spreadsheet .txt files necessary from the chunks of strings that still require translation work. 2. Added extensive comments into each file on how to use the 4 recently added XSLT files.

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<!--<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">-->
3<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java">
4<!--On Translation Memory eXchange open-XML: http://www.opentag.com/tmx.htm and http://www.gala-global.org/oscarStandards/tmx/tmx14b.html
5And for character entities: http://www.w3.org/MarkUp/html3/latin1.html
6More information: http://xml.coverpages.org/tmxSpec971212.html#O-TMF-->
7
8   <!-- HOW AND WHEN TO USE THIS FILE
9    A Translation Memory eXchange (TMX) file is an openXML file that contains translated strings that can serve as
10    a reference library for future translation work.
11
12    When users want to take all the up to date translated strings in Greenstone and use it in the Google Translator Kit
13    (GTT) as the basis to continue their translation work on the remaining strings (yet to be translated), this XSLT file
14    can be used to generate a TMX from the already-translated strings. The user can then upload this into the GTT.
15    Further translation work that the translator does in the GTT will be added to its growing internal TMX file which
16    appears to be inaccessible.
17
18    The process of generating a TMX file of Greenstone's up to date strings for a language is as follows:
19
20       1. Generate an xml file containing all the up to date chunks (those strings that don't require translation work).
21    GS2/bin/java>perl -S gti.pl get-uptodate-chunks mi coredm > ../../macros/maori/uptodatechunks_core.xml
22
23       2. Generate a TMX file from that XML file as follows:
24       GS2/bin/java>java -cp /research/ak19/gs2-svn/bin/java/ApplyXSLT.jar org.nzdl.gsdl.ApplyXSLT -x ../../macros/maori/uptodatechunks_core.xml -t ../script/gti-generate-tmx-xml.xsl -l mi > ../../maori/uptodatecore-mi.tmx
25
26       GS2/bin/java>less ../../maori/uptodatecore-mi.tmx
27
28       3. Send this to the user and they can upload it into GTT.
29
30   
31       (The DOCTYPE has only been commented out below in order to let a further XSLT process the output of this file into a spreadsheet .txt file.
32        Since a third XSLT file exists which operates on another XML file to obtain the spreadsheet .txt file, the DOCTYPE can be reinstated if required.)
33    -->
34
35
36  <xsl:output method="xml" encoding="UTF-8"/>
37  <xsl:param name="sourcelang"/>
38  <xsl:param name="targetlang"/>
39
40  <xsl:template match="GTIResponse">
41    <!-- if applying this XSLT file results in an error mentioning ".../tmx14.dtd (No such file or directory)"
42     then comment out the DOCTYPE declaration below -->
43    <!--<xsl:text disable-output-escaping="yes">&#10;&lt;!DOCTYPE tmx PUBLIC &quot;-//LISA OSCAR:1998//DTD for Translation Memory eXchange//EN&quot; &quot;tmx14.dtd&quot; &gt;&#10;</xsl:text>-->
44
45    <tmx version="1.4" xmlns="http://www.lisa.org/tmx14"><xsl:text>&#10;</xsl:text> <!--newline-->
46    <header
47       creationtool="Greenstone Translator Interface"
48       creationtoolversion="Greenstone2.85"
49       datatype="PlainText"
50       segtype="sentence"
51       adminlang="en"
52       srclang="{$sourcelang}"
53       o-tmf="GreenstoneTranslationFile">
54    </header><xsl:text>&#10;</xsl:text> <!--newline-->
55    <body>
56      <xsl:text>&#10;&#10;</xsl:text> <!--2 newlines-->
57      <xsl:apply-templates select="TranslationFile"/>
58
59
60      <xsl:apply-templates select="Chunks"/>
61
62      <xsl:apply-templates select="UptodateChunks"/>
63
64      <!-- control the order: first all the strings needing to be updated, then all those that need translating-->
65      <xsl:apply-templates select="ChunksRequiringUpdating"/>
66      <xsl:apply-templates select="ChunksRequiringTranslation"/>
67    </body>
68    </tmx>
69  </xsl:template>
70
71
72  <!-- The input file contains information as follows:
73     <TranslationFile key="coredm" target_file_path="macros/mongolian.dm" num_chunks_translated="353" num_chunks_requiring_translation="32" num_chunks_requiring_updating="18"/>
74     We want this useful information to be present in the output TMX file too. Each attribute of a TranslationFile element will be a separate <prop> property value,
75       where the TranslationFile attribute names become the values of each property's type attribute, and the TranslationFile attribute values are the property contents.-->
76  <xsl:template match="TranslationFile">
77    <xsl:for-each select="@*">
78      <prop type="{name()}"><xsl:value-of select="."/></prop>
79      <xsl:text>&#10;</xsl:text><!--newline-->
80    </xsl:for-each>
81  </xsl:template>
82
83  <xsl:template match="UptodateChunks">
84    <xsl:text>&#10;</xsl:text>
85    <note><xsl:text xml:lang="en">Uptodate chunks</xsl:text></note><xsl:text>&#10;</xsl:text>
86    <xsl:apply-templates select="Chunk">
87      <xsl:sort select="@key"/>
88    </xsl:apply-templates>
89  </xsl:template>
90
91  <xsl:template match="ChunksRequiringTranslation">
92    <xsl:text>&#10;</xsl:text> <!--newline-->
93    <note><xsl:text xml:lang="en">Chunks requiring translating</xsl:text></note><xsl:text>&#10;</xsl:text> <!--newline-->
94    <xsl:apply-templates select="Chunk">
95      <xsl:sort select="@key"/>
96    </xsl:apply-templates>
97  </xsl:template>
98
99  <xsl:template match="ChunksRequiringUpdating">
100    <xsl:text>&#10;</xsl:text> <!--newline-->     
101    <note><xsl:text xml:lang="en">Chunks requiring updating</xsl:text></note><xsl:text>&#10;</xsl:text> <!--newline-->
102    <xsl:apply-templates select="Chunk">
103      <xsl:sort select="@key"/>
104    </xsl:apply-templates>
105  </xsl:template>
106
107  <xsl:template match="Chunks">
108    <xsl:text>&#10;</xsl:text>
109    <note><xsl:text xml:lang="en">All chunks</xsl:text></note><xsl:text>&#10;</xsl:text>
110    <xsl:apply-templates select="Chunk">
111      <xsl:sort select="@key"/>
112    </xsl:apply-templates>
113  </xsl:template>
114
115
116  <xsl:template match="Chunk">
117    <tu><xsl:text>&#10;</xsl:text> <!--newline-->
118      <tuv xml:lang="{$sourcelang}">
119    <prop type="source"><xsl:value-of select="@key"/></prop><xsl:text>&#10;</xsl:text> <!--newline-->
120    <seg>
121      <xsl:for-each select="SourceFileText">
122        <xsl:variable name="tempText0" select='java:org.nzdl.gsdl.ApplyXSLT.getChunkString(.)'/> <!-- removes collection-specific attribute-like prefix in paperspast.dm-->
123        <xsl:variable name="tempText1" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText0, "\n", "&amp;#10;&#10;")'/>
124        <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&#60;", "&amp;lt;")'/>
125        <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&#62;", "&amp;gt;")'/>
126        <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&#38;", "&amp;amp;")'/> <!-- ampersands-->
127        <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
128          </xsl:for-each>
129    </seg><xsl:text>&#10;</xsl:text> <!--newline-->
130      </tuv><xsl:text>&#10;</xsl:text> <!--newline-->
131      <tuv xml:lang="{$targetlang}">
132    <prop type="target"><xsl:value-of select="@key"/></prop><xsl:text>&#10;</xsl:text> <!--newline-->
133    <seg>
134      <xsl:for-each select="TargetFileText">
135        <xsl:variable name="tempText0" select='java:org.nzdl.gsdl.ApplyXSLT.getChunkString(.)'/> <!-- removes collection-specific attribute-like prefix in paperspast.dm-->
136        <xsl:variable name="tempText1" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText0, "\n", "&amp;#10;&#10;")'/>
137        <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&#60;", "&amp;lt;")'/>
138        <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&#62;", "&amp;gt;")'/>
139        <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&#38;", "&amp;amp;")'/> <!-- ampersands-->
140        <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
141          </xsl:for-each>
142    </seg><xsl:text>&#10;</xsl:text> <!--newline-->
143      </tuv><xsl:text>&#10;</xsl:text> <!--newline-->
144    </tu><xsl:text>&#10;&#10;</xsl:text> <!--newline-->
145  </xsl:template>
146
147  <!--<xsl:template match="*">Do nothing for all other templates matched</xsl:template>-->
148
149</xsl:stylesheet>
Note: See TracBrowser for help on using the browser.