Ignore:
Timestamp:
2008-07-29T13:07:59+12:00 (16 years ago)
Author:
ak19
Message:
  1. Base64 encoded gsdlsourcefilename to preserve original filename. 2. Both subroutines set_source_filename and set_converted_filename now call set_utf8_metadata_element. 3. Subroutine add_utf8_metadata checks to see that the meta is utf8 (or else tries to make it utf8) before adding it in.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/doc.pm

    r15894 r16578  
    3939use ghtml;
    4040use File::stat;
     41use MIME::Base64;
    4142##use hashdoc;
    4243use docprint;
     
    145146    my ($source_filename) = @_;
    146147
    147     $self->set_metadata_element ($self->get_top_section(),
     148    # Since the gsdlsourcefilename element goes into the doc.xml it has
     149    # to be utf8. However, it should also *represent* the source filename
     150    # (in the import directory) which may not be utf8 at all.
     151    # For instance, if this meta element (gsdlsourcefilename) will be
     152    # used by other applications that parse doc.xml in order to locate
     153    # gsdlsourcefilename. Therefore, the solution is to encode the real
     154    # filename into Base64 encoding which is a binary-to-text encoding,
     155    # meaning that the resulting string is ASCII (utf8). See also
     156    # MIME#Content-Transfer-Encoding in Wiki.
     157   
     158    print STDERR "******Base64 encoding the non-utf8 gsdl_source_filename $source_filename ";
     159   
     160    # remove "import/" from path, base 64 encode just the source_filename
     161    # (remove trailing spaces and newlines) and prefix "import/" again.
     162    my $import_dir = "import";
     163    my $srcfilename = $source_filename;
     164    $srcfilename =~ s/^$import_dir(?:\\|\/)+(.*)$/$1/;
     165   
     166    print STDERR "-> $srcfilename -> ";
     167    $srcfilename = &MIME::Base64::encode_base64($srcfilename);
     168    $srcfilename =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
     169       
     170    $source_filename = &util::filename_cat($import_dir, $srcfilename);
     171    print STDERR "$source_filename\n";
     172   
     173    $self->set_utf8_metadata_element ($self->get_top_section(),
    148174                 "gsdlsourcefilename",
    149175                 $source_filename);
     
    154180    my ($converted_filename) = @_;
    155181
    156     $self->set_metadata_element ($self->get_top_section(),
     182    # we know the converted filename is utf8
     183    $self->set_utf8_metadata_element ($self->get_top_section(),
    157184                 "gsdlconvertedfilename",
    158185                 $converted_filename);
     
    826853    #print STDERR "###$field=$value\n";
    827854    # double check that the value is utf-8
    828     if (unicode::ensure_utf8(\$value)) {
    829     print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
     855    if (!&unicode::check_is_utf8($value)) {
     856    print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8. ";
     857    &unicode::ensure_utf8(\$value);
     858    print STDERR "Tried converting to utf8: $value\n";
    830859    }
    831860
Note: See TracChangeset for help on using the changeset viewer.