Ignore:
Timestamp:
2010-09-23T16:07:18+12:00 (14 years ago)
Author:
davidb
Message:

Old routine used to work on raw binary strings that just happened to be UTF-8 compliant. Now strings are (in the Perl sense) Unicode aware, we *don't* want to check for them being valid utf8. In fact, things can start to go wrong if we do

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/doc.pm

    r22855 r22950  
    857857    my ($section, $field, $value) = @_;
    858858
     859#    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
     860#    my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
     861#    print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n";
     862
    859863    my $section_ptr = $self->_lookup_section($section);
    860864    if (!defined $section_ptr) {
     
    872876   
    873877    #print STDERR "###$field=$value\n";
     878
     879    # For now, supress this check.  Given that text data read in is now
     880    # Unicode aware, then the following block of code can (ironically enough)
     881    # cause our unicode compliant string to be re-encoded (leading to
     882    # a double-encoded UTF-8 string, which we definitely don't want!).
     883   
     884
    874885    # double check that the value is utf-8
    875     if (!&unicode::check_is_utf8($value)) {
    876     print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8.";
    877     &unicode::ensure_utf8(\$value);
    878     print STDERR " Tried converting to utf8: $value\n";
    879     }
     886#    if (!&unicode::check_is_utf8($value)) {
     887#   print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8.";
     888#   &unicode::ensure_utf8(\$value);
     889#   print STDERR " Tried converting to utf8: $value\n";
     890#    }
    880891
    881892    push (@{$section_ptr->{'metadata'}}, [$field, $value]);
Note: See TracChangeset for help on using the changeset viewer.