Changeset 22950

Show
Ignore:
Timestamp:
23.09.2010 16:07:18 (9 years ago)
Author:
davidb
Message:

Old routine used to work on raw binary strings that just happened to be UTF-8 compliant. Now strings are (in the Perl sense) Unicode aware, we *don't* want to check for them being valid utf8. In fact, things can start to go wrong if we do

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/doc.pm

    r22855 r22950  
    857857    my ($section, $field, $value) = @_; 
    858858 
     859#    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1); 
     860#    my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/); 
     861#    print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n"; 
     862 
    859863    my $section_ptr = $self->_lookup_section($section); 
    860864    if (!defined $section_ptr) { 
     
    872876     
    873877    #print STDERR "###$field=$value\n"; 
     878 
     879    # For now, supress this check.  Given that text data read in is now  
     880    # Unicode aware, then the following block of code can (ironically enough)  
     881    # cause our unicode compliant string to be re-encoded (leading to 
     882    # a double-encoded UTF-8 string, which we definitely don't want!). 
     883     
     884 
    874885    # double check that the value is utf-8 
    875     if (!&unicode::check_is_utf8($value)) { 
    876     print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8."; 
    877     &unicode::ensure_utf8(\$value); 
    878     print STDERR " Tried converting to utf8: $value\n"; 
    879     } 
     886#    if (!&unicode::check_is_utf8($value)) { 
     887#   print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8."; 
     888#   &unicode::ensure_utf8(\$value); 
     889#   print STDERR " Tried converting to utf8: $value\n"; 
     890#    } 
    880891 
    881892    push (@{$section_ptr->{'metadata'}}, [$field, $value]);