Changeset 1220
- Timestamp:
- 2000-06-21T10:16:40+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r1190 r1220 81 81 sub new { 82 82 my $class = shift (@_); 83 my $self = new BasPlug ( );83 my $self = new BasPlug (@_); 84 84 85 85 if (!parsargv::parse(\@_, … … 92 92 q^w3mir^, \$self->{'w3mir'}, 93 93 q^assoc_files/.*/(?i)\.(jpe?g|gif|png|css|pdf)$^, \$self->{'assoc_files'}, 94 q^rename_assoc_files^, \$self->{'rename_assoc_files'})) { 94 q^rename_assoc_files^, \$self->{'rename_assoc_files'}, 95 "allow_extra_options")) { 96 95 97 &print_usage(); 96 98 die "\n"; … … 132 134 my $cursection = $doc_obj->get_top_section(); 133 135 134 # read in HTML file 135 open (FILE, $filename) || die "HTMLPlug::read - can't open $filename\n"; 136 undef $/; 137 my $text = <FILE>; 138 $/ = "\n"; 139 close FILE; 140 if (!defined $text || $text !~ /\w/) { 136 # read in HTML file ($text will be in utf8) 137 my $text = ""; 138 $self->read_file ($filename, \$text); 139 140 if ($text !~ /\w/) { 141 141 print STDERR "HTMLPlug: ERROR: $file contains no text\n" if $self->{'verbosity'}; 142 142 return 0; … … 153 153 my $web_url = "http://$file"; 154 154 $web_url =~ s/\\/\//g; # for windows 155 $doc_obj->add_ metadata($cursection, "URL", $web_url);155 $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); 156 156 157 157 # remove header and footer … … 176 176 $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 177 177 178 $doc_obj->add_ text ($cursection, $text);178 $doc_obj->add_utf8_text ($cursection, $text); 179 179 180 180 # add an OID … … 362 362 my $value = $1; 363 363 $value =~ s/\s+/ /gs; 364 $doc_obj->add_ metadata($section, $field, $value);364 $doc_obj->add_utf8_metadata($section, $field, $value); 365 365 next; 366 366 } … … 379 379 if ($title =~ /\w/) { 380 380 $title =~ s/\s+/ /gs; 381 $doc_obj->add_ metadata ($section, $field, $title);381 $doc_obj->add_utf8_metadata ($section, $field, $title); 382 382 next; 383 383 } … … 390 390 $tmptext =~ s/<[^>]*>//g; 391 391 my $title = substr ($tmptext, 0, 100); 392 $doc_obj->add_ metadata ($section, $field, $title);392 $doc_obj->add_utf8_metadata ($section, $field, $title); 393 393 } 394 394 … … 402 402 $tmptext = substr ($tmptext, 0, 200); 403 403 $tmptext =~ s/\s\S*$/.../; 404 $doc_obj->add_ metadata ($section, $field, $tmptext);404 $doc_obj->add_utf8_metadata ($section, $field, $tmptext); 405 405 } 406 406 }
Note:
See TracChangeset
for help on using the changeset viewer.