Changeset 16578

Show
Ignore:
Timestamp:
29.07.2008 13:07:59 (11 years ago)
Author:
ak19
Message:

1. Base64 encoded gsdlsourcefilename to preserve original filename. 2. Both subroutines set_source_filename and set_converted_filename now call set_utf8_metadata_element. 3. Subroutine add_utf8_metadata checks to see that the meta is utf8 (or else tries to make it utf8) before adding it in.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/doc.pm

    r15894 r16578  
    3939use ghtml; 
    4040use File::stat; 
     41use MIME::Base64; 
    4142##use hashdoc; 
    4243use docprint; 
     
    145146    my ($source_filename) = @_; 
    146147 
    147     $self->set_metadata_element ($self->get_top_section(),  
     148    # Since the gsdlsourcefilename element goes into the doc.xml it has  
     149    # to be utf8. However, it should also *represent* the source filename  
     150    # (in the import directory) which may not be utf8 at all.  
     151    # For instance, if this meta element (gsdlsourcefilename) will be  
     152    # used by other applications that parse doc.xml in order to locate  
     153    # gsdlsourcefilename. Therefore, the solution is to encode the real 
     154    # filename into Base64 encoding which is a binary-to-text encoding,  
     155    # meaning that the resulting string is ASCII (utf8). See also  
     156    # MIME#Content-Transfer-Encoding in Wiki. 
     157     
     158    print STDERR "******Base64 encoding the non-utf8 gsdl_source_filename $source_filename "; 
     159     
     160    # remove "import/" from path, base 64 encode just the source_filename  
     161    # (remove trailing spaces and newlines) and prefix "import/" again.  
     162    my $import_dir = "import"; 
     163    my $srcfilename = $source_filename; 
     164    $srcfilename =~ s/^$import_dir(?:\\|\/)+(.*)$/$1/; 
     165     
     166    print STDERR "-> $srcfilename -> "; 
     167    $srcfilename = &MIME::Base64::encode_base64($srcfilename); 
     168    $srcfilename =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle 
     169         
     170    $source_filename = &util::filename_cat($import_dir, $srcfilename); 
     171    print STDERR "$source_filename\n"; 
     172     
     173    $self->set_utf8_metadata_element ($self->get_top_section(),  
    148174                 "gsdlsourcefilename",  
    149175                 $source_filename); 
     
    154180    my ($converted_filename) = @_; 
    155181 
    156     $self->set_metadata_element ($self->get_top_section(),  
     182    # we know the converted filename is utf8 
     183    $self->set_utf8_metadata_element ($self->get_top_section(),  
    157184                 "gsdlconvertedfilename",  
    158185                 $converted_filename); 
     
    826853    #print STDERR "###$field=$value\n"; 
    827854    # double check that the value is utf-8 
    828     if (unicode::ensure_utf8(\$value)) { 
    829     print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n"; 
     855    if (!&unicode::check_is_utf8($value)) { 
     856    print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8. "; 
     857    &unicode::ensure_utf8(\$value); 
     858    print STDERR "Tried converting to utf8: $value\n"; 
    830859    } 
    831860