root/other-projects/gti/gti-tmx-to-spreadsheet.xsl @ 25287

Revision 25287, 4.6 KB (checked in by ak19, 8 years ago)

1. Added two new XSLT files: both generate the spreadsheet .txt files necessary from the chunks of strings that still require translation work. 2. Added extensive comments into each file on how to use the 4 recently added XSLT files.

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:java="http://xml.apache.org/xslt/java" xmlns:tmx="http://www.lisa.org/tmx14">
3  <!-- For character entities: http://www.w3.org/MarkUp/html3/latin1.html -->
4  <!-- The necessity for using xml namespaces all over and matching on namespaced element names http://www.stylusstudio.com/xsllist/200302/post70120.html -->
5
6   <!-- HOW AND WHEN TO USE THIS FILE
7       1. Generate an xml file containing the chunks requiring work.
8       GS2/bin/script> perl -S gti.pl get-first-n-chunks-requiring-work mi coredm 1000 > ../../macros/maori/mi-core.xml
9
10       2. Generate a TMX file from that XML file as follows:
11       GS2/bin/script> java -cp /research/ak19/gs2-svn/bin/java/ApplyXSLT.jar org.nzdl.gsdl.ApplyXSLT -x ../../macros/maori/mi-core.xml -t ../script/gti-generate-tmx-xml.xsl -l mi > ../../maori/core-mi.tmx
12
13       3. Apply this XSLT to that XML file to obtain a unicode text file containing comma-separated values (a spreadsheet).
14       NOTE: Make sure the output is a *.txt file if you wish to open it in Excel without losing the unicode
15       (when opened with .csv extension, the unicode is not preserved).
16
17       GS2/bin/script> java -cp /research/ak19/gs2-svn/bin/java/ApplyXSLT.jar org.nzdl.gsdl.ApplyXSLT -x ../../maori/core-mi.tmx -t ../script/gti-tmx-to-spreadsheet.xsl > ../../maori/core-mi-tmx2spreadsheet.txt
18
19       4. Translators wishing to read this text file into Excel, need to first open Excel. Then go to File > Open,
20       and choose to open the unicode .txt file containing the comma-separated values.
21
22       - A wizard will appear, allowing users to open this .txt file as a proper spreadsheet.
23       In the first frame of this dialog, need to specify
24       a. on the left that the file is "delimited"
25       b. in the drop down on the right, select unicode (UTF-8 or UTF-16)
26       c. Click next
27       In the second frame of the dialog, select "comma" as the delimiter. Click Finish to open the spreadsheet data.
28       
29       5. When translators have finished working on the file, they can save it simply as an Excel spreadsheet .xls file
30       (File > Save As > Excel 2003 spreadsheet) and mail it back to Greenstone.
31       Translators are advised against saving it as a .txt 2003 Excel spreadsheet, since it doesn't seem to preserve the
32       comma delimiters.
33    -->
34
35  <xsl:output method="text" encoding="UTF-16"/> <!-- When we save as txt from Excel, we choose UTF-16 too -->
36
37  <xsl:template match="tmx:tmx">
38    <xsl:apply-templates select="tmx:body"/>
39  </xsl:template>
40
41  <xsl:template match="tmx:body">
42    <xsl:text>Source key&#44;Source text&#44;Target key&#44;Target text</xsl:text><!--column headings-->
43    <xsl:text>&#10;</xsl:text> <!--newline--> 
44    <xsl:apply-templates select="tmx:tu"/>
45  </xsl:template>
46
47  <xsl:template match="tmx:tu">
48    <xsl:for-each select="tmx:tuv">
49      <xsl:if test="tmx:prop[@type='source']">
50    <xsl:text>source::</xsl:text>
51    <xsl:value-of select="tmx:prop"/>
52    <xsl:text>&#44;</xsl:text><!-- comma -->
53    <xsl:variable name="tempText1"><xsl:value-of select="tmx:seg"/></xsl:variable>
54    <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&amp;#10;&#10;", "&amp;#10;")'/>
55    <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&amp;lt;", "&#60;")'/>
56    <xsl:variable name="tempText4" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&amp;gt;", "&#62;")'/>
57    <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText4, "&amp;amp;", "&#38;")'/>
58    <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
59    <xsl:text>&#44;</xsl:text><!-- comma -->
60    </xsl:if>
61    </xsl:for-each>
62
63    <xsl:for-each select="tmx:tuv">
64      <xsl:if test="tmx:prop[@type='target']">
65    <xsl:text>target::</xsl:text>
66    <xsl:value-of select="tmx:prop"/>
67    <xsl:text>&#44;</xsl:text><!-- comma -->
68    <xsl:variable name="tempText1"><xsl:value-of select="tmx:seg"/></xsl:variable>
69    <xsl:variable name="tempText2" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText1, "&amp;#10;&#10;", "&amp;#10;")'/>
70    <xsl:variable name="tempText3" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText2, "&amp;lt;", "&#60;")'/>
71    <xsl:variable name="tempText4" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText3, "&amp;gt;", "&#62;")'/>
72    <xsl:variable name="escapedText" select='java:org.nzdl.gsdl.ApplyXSLT.replaceAll($tempText4, "&amp;amp;", "&#38;")'/>
73    <xsl:value-of select='$escapedText' disable-output-escaping="yes"/>
74    <xsl:text>&#10;</xsl:text><!-- newline -->
75      </xsl:if>
76    </xsl:for-each>
77  </xsl:template>
78
79</xsl:stylesheet>
Note: See TracBrowser for help on using the browser.