Ignore:
Timestamp:
2004-10-05T12:39:06+13:00 (20 years ago)
Author:
jrm21
Message:

use the unicode::ensure_utf8() function on the extracted text so we can
be sure that we don't generate bad archive files.

(BasPlug already does that when adding metadata.)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r8170 r8218  
    2727
    2828use ConvertToPlug;
     29use unicode;
    2930
    3031sub BEGIN {
     
    139140    my $outhandle=$self->{'outhandle'};
    140141
     142    my $textref=$_[0];
     143
    141144    if ($self->{'use_sections'}
    142145    && $self->{'converted_to'} eq "HTML") {
    143146
    144147    print $outhandle "PDFPlug: Calculating sections...\n";
    145     my $textref=$_[0];
    146148
    147149    # we have "<a name=1></a>" etc for each page
     
    196198    }
    197199
     200    # turn any high bytes that aren't valid utf-8 into utf-8.
     201    unicode::ensure_utf8($textref);
     202
    198203    print $outhandle "PDFPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
    199204    if $self->{'verbosity'} > 1;
     
    215220
    216221    # Add NumPages metadata (we have "<a name=1>" etc for each page)
    217     my $textref = $_[0];
    218222    my @pages = ($$textref =~ /\<a name=\d+\>/ig);
    219223    $doc_obj->add_utf8_metadata($cursection, "NumPages", scalar(@pages));
Note: See TracChangeset for help on using the changeset viewer.