Changeset 25285

Show
Ignore:
Timestamp:
23.03.2012 16:46:29 (7 years ago)
Author:
ak19
Message:

Updated to deal with paperspast.dm: added a new module into gti.pl for this. Because getting chunks from the paperspast.dm file results in attribute like values in the source and target strings, there are now functions in ApplyXSLT.java to remove and retrieve these. Finally, the function to remove these attribute-like values in the source and target strings is called in the gti-generate-tmx-xml.xslt file.

Location:
main/trunk/greenstone2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gti.pl

    r25249 r25285  
    5454    'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" }, 
    5555 
     56{ 'key' => "paperspastdm", 
     57    'file_type' => "macrofile", 
     58    'source_file' => "macros/paperspast-english.dm", 
     59    'target_file' => "macros/paperspast-{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" }, 
     60 
    5661# GLI dictionary 
    5762{ 'key' => "glidict", 
  • main/trunk/greenstone2/build-src/src/java/org/nzdl/gsdl/ApplyXSLT.java

    r25241 r25285  
    336336  } 
    337337 
     338    // Necessary for paperspast.dm, but can be used generally.  
     339    // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines  
     340    // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target 
     341    // This function returns just the 'string' portion of the chunk of data: e.g 'All newspapers' and 'Niupepa katoa' 
     342    static public String getChunkString(String target_file_text) 
     343    { 
     344    int startindex = target_file_text.indexOf("["); 
     345    if(startindex != 0) { 
     346        return target_file_text; 
     347    } // to test that the input requires processing 
     348 
     349    // else 
     350    startindex = target_file_text.indexOf("{"); 
     351    int endindex = target_file_text.lastIndexOf("}"); 
     352    if(startindex != -1 && endindex != -1) { 
     353        return target_file_text.substring(startindex+1, endindex); // skips { and } 
     354    } else { 
     355        return target_file_text; 
     356    } 
     357     
     358    } 
     359 
     360    // Necessary for paperspast.dm, but can be used generally.  
     361    // The get-chunks cmd of gti.pl perl script when run over paperspast.dm returns XML with source and target lines  
     362    // like: [c=paperspast] {All newspapers} for source and [c=paperspast,l=mi] {Niupepa katoa} for target 
     363    // This function returns just the 'attribute' portion of the chunk of data: e.g 'c=paperspast' and 'c=paperspast,l=mi' 
     364    static public String getChunkAttr(String target_file_text) 
     365    { 
     366    int startindex = target_file_text.indexOf("["); 
     367    if(startindex != 0) { 
     368        return target_file_text; 
     369    } // to test that the input requires processing 
     370 
     371    // else 
     372    startindex = target_file_text.indexOf("{"); 
     373    int endindex = target_file_text.lastIndexOf("}"); 
     374    if(startindex != -1 && endindex != -1) { 
     375        endindex = target_file_text.lastIndexOf("]", startindex); // look for ] preceding the { 
     376        if(endindex > 1) { //if(endindex != -1) { 
     377                       // so there's something to substring between [ and ]                         
     378        return target_file_text.substring(1, endindex).trim(); // skips [ and ] 
     379        } 
     380    }  
     381    return target_file_text; 
     382    } 
    338383 
    339384  public static void main(String[] args)