Changeset 25285


Ignore:
Timestamp:
2012-03-23T16:46:29+13:00 (12 years ago)
Author:
ak19
Message:

Updated to deal with paperspast.dm: added a new module into gti.pl for this. Because getting chunks from the paperspast.dm file results in attribute like values in the source and target strings, there are now functions in ApplyXSLT.java to remove and retrieve these. Finally, the function to remove these attribute-like values in the source and target strings is called in the gti-generate-tmx-xml.xslt file.

Location:
main/trunk/greenstone2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gti.pl

    r25249 r25285  
    5454    'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
    5555
     56{ 'key' => "paperspastdm",
     57    'file_type' => "macrofile",
     58    'source_file' => "macros/paperspast-english.dm",
     59    'target_file' => "macros/paperspast-{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
     60
    5661# GLI dictionary
    5762{ 'key' => "glidict",
  • main/trunk/greenstone2/build-src/src/java/org/nzdl/gsdl/ApplyXSLT.java

    r25241 r25285  
    336336  }
    337337
     338    // Necessary for paperspast.dm, but can be used generally.
     339    // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
     340    // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
     341    // This function returns just the 'string' portion of the chunk of data: e.g 'All newspapers' and 'Niupepa katoa'
     342    static public String getChunkString(String target_file_text)
     343    {
     344    int startindex = target_file_text.indexOf("[");
     345    if(startindex != 0) {
     346        return target_file_text;
     347    } // to test that the input requires processing
     348
     349    // else
     350    startindex = target_file_text.indexOf("{");
     351    int endindex = target_file_text.lastIndexOf("}");
     352    if(startindex != -1 && endindex != -1) {
     353        return target_file_text.substring(startindex+1, endindex); // skips { and }
     354    } else {
     355        return target_file_text;
     356    }
     357   
     358    }
     359
     360    // Necessary for paperspast.dm, but can be used generally.
     361    // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines
     362    // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target
     363    // This function returns just the 'attribute' portion of the chunk of data: e.g 'c=paperspast' and 'c=paperspast,l=mi'
     364    static public String getChunkAttr(String target_file_text)
     365    {
     366    int startindex = target_file_text.indexOf("[");
     367    if(startindex != 0) {
     368        return target_file_text;
     369    } // to test that the input requires processing
     370
     371    // else
     372    startindex = target_file_text.indexOf("{");
     373    int endindex = target_file_text.lastIndexOf("}");
     374    if(startindex != -1 && endindex != -1) {
     375        endindex = target_file_text.lastIndexOf("]", startindex); // look for ] preceding the {
     376        if(endindex > 1) { //if(endindex != -1) {
     377                       // so there's something to substring between [ and ]                       
     378        return target_file_text.substring(1, endindex).trim(); // skips [ and ]
     379        }
     380    }
     381    return target_file_text;
     382    }
    338383
    339384  public static void main(String[] args)
Note: See TracChangeset for help on using the changeset viewer.