Changeset 30719 for main


Ignore:
Timestamp:
2016-08-16T21:11:53+12:00 (8 years ago)
Author:
ak19
Message:

Redoing the work for commits 30681 and 30687, to handle escaped colons (or equal separators) in chunk_keys of properties files used by GTI. Kathy recently introduced escaped colons into the metadata_names.properties files, but the GTI code did not yet support this.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gti.pl

    r30716 r30719  
    679679    foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
    680680
    681     # Kathy introduced escaped colons ("\:") into chunk keys in properties files (greenstone3/metadata_names),
    682     # but they're not escaped in the submitted XML versions. So have to carefully compare for submissions:
    683     my $escaped_chunk_key = $chunk_key;
    684     $escaped_chunk_key =~ s/:/\\:/g;
     681    # Kathy introduced escaped colons ("\:") into chunk keys in properties files (greenstone3/metadata_names), 
     682    # but they're not escaped in the submitted XML versions, nor are they escaped in memory (in the $chunk_key)
    685683
    686684        # Make sure the submitted chunk still exists in the source file
    687         if (!defined($source_file_key_to_text_mapping{$escaped_chunk_key})) {
     685        if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
    688686            &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
    689687            delete $source_file_key_to_submission_mapping{$chunk_key};
     
    693691       
    694692        # Make sure the submitted source chunk matches the source file chunk
    695         if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$escaped_chunk_key})) {
     693        if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
    696694        #if (&unmake_text_xml_safe($source_file_key_to_submission_mapping{$chunk_key}) ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
    697695                    #print STDERR "**** $source_file_key_to_submission_mapping{$chunk_key}\n";
    698696                #print STDERR "**** " . &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key}) ."\n";
    699697
    700             &log_message("Warning: Source chunk $escaped_chunk_key has changed (ignoring submission).");
     698            &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
    701699            &log_message("Submission source: |$source_file_key_to_submission_mapping{$chunk_key}|");
    702             &log_message("      Source text: |$source_file_key_to_text_mapping{$escaped_chunk_key}|");
     700            &log_message("      Source text: |$source_file_key_to_text_mapping{$chunk_key}|");
    703701            delete $source_file_key_to_submission_mapping{$chunk_key};
    704702            delete $target_file_key_to_submission_mapping{$chunk_key};
     
    710708    foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
    711709        # Only apply the submission if it is a change, unless -force_submission has been specified
    712     my $escaped_chunk_key = $chunk_key;
    713     $escaped_chunk_key =~ s/:/\\:/g;
    714         if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$escaped_chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$escaped_chunk_key}) {
    715             $target_file_key_to_text_mapping{$escaped_chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
    716             $target_file_key_to_gti_comment_mapping{$escaped_chunk_key} = "Updated $submission_date by $submitter_username";
     710        if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
     711            $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
     712            $target_file_key_to_gti_comment_mapping{$chunk_key} = "Updated $submission_date by $submitter_username";
    717713        }
    718714    }
     
    16211617        $line =~ s/(\s*)$//;  # Remove any nasty whitespace, carriage returns etc.
    16221618       
    1623         $line =~ s/\\:/ESCAPEDCOLONTEMP/g; # an escaped colon "\:" should only occur in the chunk_key portion of the line
    1624 
    1625         # Line contains a dictionary string
    1626         if ($line =~ /^(\S+?)[:|=](.*)$/) {
     1619        # a property line has a colon/equals sign as separator that is NOT escaped with a backslash (both keys and values
     1620        # can use the colon or = sign. But in the key, such a char is always escaped. Unfortunately, they've not always been
     1621        # escaped in the values. So we get the left most occurrence by not doing a greedy match (use ? to not be greedy).
     1622        # So find the first :/= char not preceded by \. That will be the true separator of a chunk_key and its value chunk_text
     1623
     1624        if ($line =~ m/^(\S*?[^\\])[:|=](.*)$/) { #if ($line =~ /^(\S+?)[:|=](.*)$/) {
     1625            # Line contains a dictionary string
     1626
     1627            # Unused but useful: http://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl
     1628            # http://perldoc.perl.org/perlvar.html
     1629           
    16271630            $chunk_key = $1;
    1628 
    1629             # an escaped colon "\:" should only occur in the chunk_key portion of the line, put it back in, escaped as before
    1630             if($chunk_key =~ m/ESCAPEDCOLONTEMP/) {
    1631             $chunk_key =~ s/ESCAPEDCOLONTEMP/\\:/g;
    1632             }
     1631            # remove the escaping of any :/= property separator from the chunk_key in memory,
     1632            # to make comparison with its unescaped version during submissions easier. Will write out with escaping.
     1633            $chunk_key =~ s/\\([:=])/$1/g;       
    16331634           
    16341635            $startindex = $i;
     
    16541655    my ($chunk_text) = @_;
    16551656   
    1656     $chunk_text =~ s/\\:/ESCAPEDCOLONTEMP/g; # an escaped colon "\:" should only occur in the chunk_key portion of the line.
    1657     # But it seems to occur in chunk text too now (before http:\\) in dictionary.properties.
    1658 
    1659     # Simple: just remove string key   
    1660     $chunk_text =~ s/^(\S+?)[:|=](\s*)//s;
    1661 
    1662     if($chunk_text =~ m/ESCAPEDCOLONTEMP/) { # put back the escaped colons
    1663     $chunk_text =~ s/ESCAPEDCOLONTEMP/\\:/g;
    1664     }
     1657    # Simple: just remove string key.
     1658    # But key can contain an escaped separator (\: or \=).
     1659    # So just as in the previous subroutine, find the first (leftmost) : or = char not preceded by \.
     1660    # That will be the true separator of a chunk_key and its value chunk_text
     1661    $chunk_text =~ s/^(\S*?[^\\])[:|=](\s*)//s; # $chunk_text =~ s/^(\S+)[:|=](\s*)//s;
    16651662
    16661663    $chunk_text =~ s/(\s*)$//s;  # Remove any nasty whitespace, carriage returns etc.
     
    16781675    if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
    16791676        return $1;
    1680 }
    1681 
    1682 return undef;
     1677    }
     1678
     1679    return undef;
    16831680}
    16841681
     
    17311728        my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
    17321729       
     1730        # make sure any : or = sign in the chunk key is escaped again (with \) when written out
     1731        # since the key-value separator in a property resource bundle file is : or =
     1732        my $escaped_chunk_key = $chunk_key;
     1733        $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g;
     1734       
    17331735        # If no translation exists for this chunk, show this, and move on
    17341736        if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
    1735             print TARGET_FILE "# -- Missing translation: $chunk_key\n";
     1737            print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n";
    17361738            next;
    17371739        }
    1738        
    1739         print TARGET_FILE "$chunk_key:$target_file_chunk_text";
     1740
     1741        print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text";
    17401742        if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
    17411743            print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
     
    22022204            my $target_file_chunk_text = $target_file_key_to_text_mapping->{$global_chunk_key} || "";
    22032205           
     2206            # make sure any : or = sign in the chunk key is escaped again (with \) when written out
     2207            # since the key-value separator in a property resource bundle file is : or =
     2208            my $escaped_chunk_key = $chunk_key;
     2209            $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g;
     2210
    22042211            # If no translation exists for this chunk, show this, and move on
    22052212            if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
    2206                 print TARGET_FILE "# -- Missing translation: $chunk_key\n";
     2213                print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n";
    22072214                next;
    22082215            }
    22092216           
    2210             print TARGET_FILE "$chunk_key:$target_file_chunk_text";
     2217            print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text";
    22112218            if ($target_file_key_to_gti_comment_mapping->{$global_chunk_key}) {
    22122219                print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$global_chunk_key};
Note: See TracChangeset for help on using the changeset viewer.