Changeset 1220


Ignore:
Timestamp:
2000-06-21T10:16:40+12:00 (24 years ago)
Author:
sjboddie
Message:

Caught HTMLPlug up with the changes I made to BasPlug. HTMLPlug now uses
the new BasPlug::read_file function and may be passed a -input_encoding
option (which may be set to Latin1, Arabic, gb, ascii, unicode, etc.).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r1190 r1220  
    8181sub new {
    8282    my $class = shift (@_);
    83     my $self = new BasPlug ();
     83    my $self = new BasPlug (@_);
    8484
    8585    if (!parsargv::parse(\@_,
     
    9292             q^w3mir^, \$self->{'w3mir'},
    9393             q^assoc_files/.*/(?i)\.(jpe?g|gif|png|css|pdf)$^, \$self->{'assoc_files'},
    94              q^rename_assoc_files^, \$self->{'rename_assoc_files'})) {
     94             q^rename_assoc_files^, \$self->{'rename_assoc_files'},
     95             "allow_extra_options")) {
     96
    9597    &print_usage();
    9698    die "\n";
     
    132134    my $cursection = $doc_obj->get_top_section();
    133135   
    134     # read in HTML file
    135     open (FILE, $filename) || die "HTMLPlug::read - can't open $filename\n";
    136     undef $/;
    137     my $text = <FILE>;
    138     $/ = "\n";
    139     close FILE;
    140     if (!defined $text || $text !~ /\w/) {
     136    # read in HTML file ($text will be in utf8)
     137    my $text = "";
     138    $self->read_file ($filename, \$text);
     139
     140    if ($text !~ /\w/) {
    141141    print STDERR "HTMLPlug: ERROR: $file contains no text\n" if $self->{'verbosity'};
    142142    return 0;
     
    153153    my $web_url = "http://$file";
    154154    $web_url =~ s/\\/\//g; # for windows
    155     $doc_obj->add_metadata($cursection, "URL", $web_url);
     155    $doc_obj->add_utf8_metadata($cursection, "URL", $web_url);
    156156
    157157    # remove header and footer
     
    176176    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    177177
    178     $doc_obj->add_text ($cursection, $text);
     178    $doc_obj->add_utf8_text ($cursection, $text);
    179179
    180180    # add an OID
     
    362362            my $value = $1;
    363363            $value =~ s/\s+/ /gs;
    364             $doc_obj->add_metadata($section, $field, $value);
     364            $doc_obj->add_utf8_metadata($section, $field, $value);
    365365            next;
    366366        }
     
    379379            if ($title =~ /\w/) {
    380380            $title =~ s/\s+/ /gs;
    381             $doc_obj->add_metadata ($section, $field, $title);
     381            $doc_obj->add_utf8_metadata ($section, $field, $title);
    382382            next;
    383383            }
     
    390390        $tmptext =~ s/<[^>]*>//g;
    391391        my $title = substr ($tmptext, 0, 100);
    392         $doc_obj->add_metadata ($section, $field, $title);
     392        $doc_obj->add_utf8_metadata ($section, $field, $title);
    393393    }
    394394
     
    402402        $tmptext = substr ($tmptext, 0, 200);
    403403        $tmptext =~ s/\s\S*$/.../;
    404         $doc_obj->add_metadata ($section, $field, $tmptext);
     404        $doc_obj->add_utf8_metadata ($section, $field, $tmptext);
    405405    }
    406406    }
Note: See TracChangeset for help on using the changeset viewer.