Changeset 30719

Show
Ignore:
Timestamp:
16.08.2016 21:11:53 (3 years ago)
Author:
ak19
Message:

Redoing the work for commits 30681 and 30687, to handle escaped colons (or equal separators) in chunk_keys of properties files used by GTI. Kathy recently introduced escaped colons into the metadata_names.properties files, but the GTI code did not yet support this.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gti.pl

    r30716 r30719  
    679679    foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) { 
    680680 
    681     # Kathy introduced escaped colons ("\:") into chunk keys in properties files (greenstone3/metadata_names), 
    682     # but they're not escaped in the submitted XML versions. So have to carefully compare for submissions: 
    683     my $escaped_chunk_key = $chunk_key; 
    684     $escaped_chunk_key =~ s/:/\\:/g; 
     681    # Kathy introduced escaped colons ("\:") into chunk keys in properties files (greenstone3/metadata_names),   
     682    # but they're not escaped in the submitted XML versions, nor are they escaped in memory (in the $chunk_key) 
    685683 
    686684        # Make sure the submitted chunk still exists in the source file 
    687         if (!defined($source_file_key_to_text_mapping{$escaped_chunk_key})) { 
     685        if (!defined($source_file_key_to_text_mapping{$chunk_key})) { 
    688686            &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission)."); 
    689687            delete $source_file_key_to_submission_mapping{$chunk_key}; 
     
    693691         
    694692        # Make sure the submitted source chunk matches the source file chunk 
    695         if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$escaped_chunk_key})) { 
     693        if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) { 
    696694        #if (&unmake_text_xml_safe($source_file_key_to_submission_mapping{$chunk_key}) ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) { 
    697695                    #print STDERR "**** $source_file_key_to_submission_mapping{$chunk_key}\n"; 
    698696                #print STDERR "**** " . &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key}) ."\n"; 
    699697 
    700             &log_message("Warning: Source chunk $escaped_chunk_key has changed (ignoring submission)."); 
     698            &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission)."); 
    701699            &log_message("Submission source: |$source_file_key_to_submission_mapping{$chunk_key}|"); 
    702             &log_message("      Source text: |$source_file_key_to_text_mapping{$escaped_chunk_key}|"); 
     700            &log_message("      Source text: |$source_file_key_to_text_mapping{$chunk_key}|"); 
    703701            delete $source_file_key_to_submission_mapping{$chunk_key}; 
    704702            delete $target_file_key_to_submission_mapping{$chunk_key}; 
     
    710708    foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) { 
    711709        # Only apply the submission if it is a change, unless -force_submission has been specified 
    712     my $escaped_chunk_key = $chunk_key; 
    713     $escaped_chunk_key =~ s/:/\\:/g; 
    714         if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$escaped_chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$escaped_chunk_key}) { 
    715             $target_file_key_to_text_mapping{$escaped_chunk_key} = $target_file_key_to_submission_mapping{$chunk_key}; 
    716             $target_file_key_to_gti_comment_mapping{$escaped_chunk_key} = "Updated $submission_date by $submitter_username"; 
     710        if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) { 
     711            $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key}; 
     712            $target_file_key_to_gti_comment_mapping{$chunk_key} = "Updated $submission_date by $submitter_username"; 
    717713        } 
    718714    } 
     
    16211617        $line =~ s/(\s*)$//;  # Remove any nasty whitespace, carriage returns etc. 
    16221618         
    1623         $line =~ s/\\:/ESCAPEDCOLONTEMP/g; # an escaped colon "\:" should only occur in the chunk_key portion of the line 
    1624  
    1625         # Line contains a dictionary string 
    1626         if ($line =~ /^(\S+?)[:|=](.*)$/) { 
     1619        # a property line has a colon/equals sign as separator that is NOT escaped with a backslash (both keys and values 
     1620        # can use the colon or = sign. But in the key, such a char is always escaped. Unfortunately, they've not always been 
     1621        # escaped in the values. So we get the left most occurrence by not doing a greedy match (use ? to not be greedy). 
     1622        # So find the first :/= char not preceded by \. That will be the true separator of a chunk_key and its value chunk_text 
     1623 
     1624        if ($line =~ m/^(\S*?[^\\])[:|=](.*)$/) { #if ($line =~ /^(\S+?)[:|=](.*)$/) { 
     1625            # Line contains a dictionary string 
     1626 
     1627            # Unused but useful: http://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl 
     1628            # http://perldoc.perl.org/perlvar.html 
     1629             
    16271630            $chunk_key = $1; 
    1628  
    1629             # an escaped colon "\:" should only occur in the chunk_key portion of the line, put it back in, escaped as before 
    1630             if($chunk_key =~ m/ESCAPEDCOLONTEMP/) { 
    1631             $chunk_key =~ s/ESCAPEDCOLONTEMP/\\:/g; 
    1632             } 
     1631            # remove the escaping of any :/= property separator from the chunk_key in memory,  
     1632            # to make comparison with its unescaped version during submissions easier. Will write out with escaping. 
     1633            $chunk_key =~ s/\\([:=])/$1/g;        
    16331634             
    16341635            $startindex = $i; 
     
    16541655    my ($chunk_text) = @_; 
    16551656     
    1656     $chunk_text =~ s/\\:/ESCAPEDCOLONTEMP/g; # an escaped colon "\:" should only occur in the chunk_key portion of the line. 
    1657     # But it seems to occur in chunk text too now (before http:\\) in dictionary.properties. 
    1658  
    1659     # Simple: just remove string key     
    1660     $chunk_text =~ s/^(\S+?)[:|=](\s*)//s; 
    1661  
    1662     if($chunk_text =~ m/ESCAPEDCOLONTEMP/) { # put back the escaped colons 
    1663     $chunk_text =~ s/ESCAPEDCOLONTEMP/\\:/g; 
    1664     } 
     1657    # Simple: just remove string key.  
     1658    # But key can contain an escaped separator (\: or \=). 
     1659    # So just as in the previous subroutine, find the first (leftmost) : or = char not preceded by \.  
     1660    # That will be the true separator of a chunk_key and its value chunk_text 
     1661    $chunk_text =~ s/^(\S*?[^\\])[:|=](\s*)//s; # $chunk_text =~ s/^(\S+)[:|=](\s*)//s; 
    16651662 
    16661663    $chunk_text =~ s/(\s*)$//s;  # Remove any nasty whitespace, carriage returns etc. 
     
    16781675    if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) { 
    16791676        return $1; 
    1680 } 
    1681  
    1682 return undef; 
     1677    } 
     1678 
     1679    return undef; 
    16831680} 
    16841681 
     
    17311728        my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || ""; 
    17321729         
     1730        # make sure any : or = sign in the chunk key is escaped again (with \) when written out  
     1731        # since the key-value separator in a property resource bundle file is : or = 
     1732        my $escaped_chunk_key = $chunk_key; 
     1733        $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g; 
     1734         
    17331735        # If no translation exists for this chunk, show this, and move on 
    17341736        if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") { 
    1735             print TARGET_FILE "# -- Missing translation: $chunk_key\n"; 
     1737            print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n"; 
    17361738            next; 
    17371739        } 
    1738          
    1739         print TARGET_FILE "$chunk_key:$target_file_chunk_text"; 
     1740 
     1741        print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text"; 
    17401742        if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) { 
    17411743            print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$chunk_key}; 
     
    22022204            my $target_file_chunk_text = $target_file_key_to_text_mapping->{$global_chunk_key} || ""; 
    22032205             
     2206            # make sure any : or = sign in the chunk key is escaped again (with \) when written out 
     2207            # since the key-value separator in a property resource bundle file is : or = 
     2208            my $escaped_chunk_key = $chunk_key; 
     2209            $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g; 
     2210 
    22042211            # If no translation exists for this chunk, show this, and move on 
    22052212            if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") { 
    2206                 print TARGET_FILE "# -- Missing translation: $chunk_key\n"; 
     2213                print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n"; 
    22072214                next; 
    22082215            } 
    22092216             
    2210             print TARGET_FILE "$chunk_key:$target_file_chunk_text"; 
     2217            print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text"; 
    22112218            if ($target_file_key_to_gti_comment_mapping->{$global_chunk_key}) { 
    22122219                print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$global_chunk_key};