Changeset 16024
- Timestamp:
- 2008-06-16 14:21:20 (5 months ago)
- Files:
-
- gsdl/trunk/perllib/plugins/HTMLPlugin.pm (modified) (23 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
gsdl/trunk/perllib/plugins/HTMLPlugin.pm
r15872 r16024 127 127 my $self = shift (@_); 128 128 my ($htmlfile, $text) = @_; 129 129 130 130 # load in the file 131 131 if (!open (FILE, $htmlfile)) { … … 152 152 my $self = shift (@_); 153 153 my ($foundbody, $text, $handle) = @_; 154 154 155 155 my $line = ""; 156 156 while (defined ($line = <$handle>)) { … … 174 174 } 175 175 176 if ($self->{'input_encoding'} eq "iso_8859_1") {176 if ($self->{'input_encoding'} eq "iso_8859_1") { 177 177 # convert to utf-8 178 178 $$text=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1", $text)); … … 221 221 return $section; 222 222 } 223 223 224 224 # Will convert the oldHDL format to the new HDL format (using the Section tag) 225 225 sub convert_to_newHDLformat 226 226 { 227 my $self = shift (@_);228 my ($file,$cnfile) = @_;229 my $input_filename = $file;230 my $tmp_filename = $cnfile;231 232 # write HTML tmp file with new HDL format233 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");234 235 # read in the file and do basic html cleaning (removing header etc)236 my $html = "";237 $self->HB_read_html_file ($input_filename, \$html);238 227 my $self = shift (@_); 228 my ($file,$cnfile) = @_; 229 my $input_filename = $file; 230 my $tmp_filename = $cnfile; 231 232 # write HTML tmp file with new HDL format 233 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 234 235 # read in the file and do basic html cleaning (removing header etc) 236 my $html = ""; 237 $self->HB_read_html_file ($input_filename, \$html); 238 239 239 # process the file one section at a time 240 240 my $curtoclevel = 1; … … 281 281 282 282 print PROD "<Section>\n\t<Description>\n\t\t<Metadata name=\"Title\">$title</Metadata>\n\t</Description>\n"; 283 283 284 284 print PROD "-->\n"; 285 285 286 286 # clean up the section html 287 287 $sectiontext = $self->HB_clean_section($sectiontext); … … 324 324 my ($file) = @_; 325 325 my $input_filename = $file; 326 326 327 327 if (-d $input_filename) 328 328 { … … 353 353 my $f_separator = &util::get_os_dirsep(); 354 354 355 if ($dirname =~ /import$f_separator/)355 if ($dirname =~ /import$f_separator/) 356 356 { 357 $test_dirname = $'; 357 $test_dirname = $'; #' 358 358 359 359 #print STDERR "init $'\n"; … … 361 361 while ($test_dirname =~ /[$f_separator]/) 362 362 { 363 my $folderdirname = $`;364 $tmp_dirname = &util::filename_cat($tmp_dirname,$folderdirname);365 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);366 $test_dirname = $';363 my $folderdirname = $`; 364 $tmp_dirname = &util::filename_cat($tmp_dirname,$folderdirname); 365 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname); 366 $test_dirname = $'; #' 367 367 } 368 368 } … … 377 377 if ($self->{'old_style_HDL'}) 378 378 { 379 $hdl_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");380 $hdl_output_filename = $self->convert_to_newHDLformat($input_filename,$hdl_output_filename);379 $hdl_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix"); 380 $hdl_output_filename = $self->convert_to_newHDLformat($input_filename,$hdl_output_filename); 381 381 } 382 382 … … 388 388 foreach my $file (@files) 389 389 { 390 my $src_file = &util::filename_cat($base_dirname,$file);391 my $dest_file = &util::filename_cat($tmp_dirname,$file);392 if ((!-e $dest_file) && (!-d $src_file))393 {394 # just copy the original file back to the tmp directory395 copy($src_file,$dest_file) or die "Can't copy file $src_file to $dest_file $!";396 }390 my $src_file = &util::filename_cat($base_dirname,$file); 391 my $dest_file = &util::filename_cat($tmp_dirname,$file); 392 if ((!-e $dest_file) && (!-d $src_file)) 393 { 394 # just copy the original file back to the tmp directory 395 copy($src_file,$dest_file) or die "Can't copy file $src_file to $dest_file $!"; 396 } 397 397 } 398 398 … … 401 401 if ($self->{'tidy_html'}) 402 402 { 403 $tidy_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");404 $tidy_output_filename = $self->tmp_tidy_file($hdl_output_filename,$tidy_output_filename);403 $tidy_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix"); 404 $tidy_output_filename = $self->tmp_tidy_file($hdl_output_filename,$tidy_output_filename); 405 405 } 406 406 $tmp_filename = $tidy_output_filename; … … 410 410 if (!-e $tmp_filename) 411 411 { 412 # just copy the original file back to the tmp directory413 copy($input_filename,$tmp_filename) or die "Can't copy file $input_filename to $tmp_filename $!";412 # just copy the original file back to the tmp directory 413 copy($input_filename,$tmp_filename) or die "Can't copy file $input_filename to $tmp_filename $!"; 414 414 } 415 415 } … … 418 418 } 419 419 420 420 421 421 # Will make the html input file as a proper XML file with removed font tag and 422 422 # image size added to the img tag. … … 429 429 my $input_filename = $file; 430 430 my $tmp_filename = $cnfile; 431 431 432 432 # get the input filename 433 433 my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$"); 434 434 435 435 require HTML::TokeParser::Simple; 436 437 # create HTML parser to decode the input file438 my $parser = HTML::TokeParser::Simple->new($input_filename);439 440 # write HTML tmp file without the font tag and image size are added to the img tag441 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");442 while (my $token = $parser->get_token())443 {444 # is it an img tag445 if ($token->is_start_tag('img'))446 {447 # get the attributes448 my $attr = $token->return_attr;449 450 # get the full path to the image451 my $img_file = &util::filename_cat($dirname,$attr->{src});436 437 # create HTML parser to decode the input file 438 my $parser = HTML::TokeParser::Simple->new($input_filename); 439 440 # write HTML tmp file without the font tag and image size are added to the img tag 441 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 442 while (my $token = $parser->get_token()) 443 { 444 # is it an img tag 445 if ($token->is_start_tag('img')) 446 { 447 # get the attributes 448 my $attr = $token->return_attr; 449 450 # get the full path to the image 451 my $img_file = &util::filename_cat($dirname,$attr->{src}); 452 452 453 # set the width and height attribute454 ($attr->{width}, $attr->{height}) = imgsize($img_file);455 456 # recreate the tag457 print PROD "<img";458 print PROD map { qq { $_="$attr->{$_}"} } keys %$attr;459 print PROD ">";460 }461 # is it a font tag462 else463 {464 if (($token->is_start_tag('font')) || ($token->is_end_tag('font')))465 {466 # remove font tag467 print PROD "";468 }469 else470 {471 # print without changes472 print PROD $token->as_is;473 }474 }475 }476 close (PROD) || die("Error Closing File: $tmp_filename $!");477 478 # run html-tidy on the tmp file to make it a proper XML file479 my $tidyfile = `tidy -utf8 -wrap 0 -asxml $tmp_filename`;480 481 # write result back to the tmp file482 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");483 print PROD $tidyfile;484 close (PROD) || die("Error Closing File: $tmp_filename $!");453 # set the width and height attribute 454 ($attr->{width}, $attr->{height}) = imgsize($img_file); 455 456 # recreate the tag 457 print PROD "<img"; 458 print PROD map { qq { $_="$attr->{$_}"} } keys %$attr; 459 print PROD ">"; 460 } 461 # is it a font tag 462 else 463 { 464 if (($token->is_start_tag('font')) || ($token->is_end_tag('font'))) 465 { 466 # remove font tag 467 print PROD ""; 468 } 469 else 470 { 471 # print without changes 472 print PROD $token->as_is; 473 } 474 } 475 } 476 close (PROD) || die("Error Closing File: $tmp_filename $!"); 477 478 # run html-tidy on the tmp file to make it a proper XML file 479 my $tidyfile = `tidy -utf8 -wrap 0 -asxml $tmp_filename`; 480 481 # write result back to the tmp file 482 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 483 print PROD $tidyfile; 484 close (PROD) || die("Error Closing File: $tmp_filename $!"); 485 485 486 486 # return the output filename … … 535 535 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 536 536 push(@{$hashArgOptLists->{"OptList"}},$options); 537 537 538 538 539 539 my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists); … … 609 609 } 610 610 611 611 612 612 # do plugin specific processing of doc_obj 613 613 sub process { … … 622 622 623 623 if ($ENV{'GSDLOS'} =~ /^windows/i) { 624 # this makes life so much easier... perl can cope with unix-style '/'s.625 $base_dir =~ s@(\\)+@/@g;626 $file =~ s@(\\)+@/@g;624 # this makes life so much easier... perl can cope with unix-style '/'s. 625 $base_dir =~ s@(\\)+@/@g; 626 $file =~ s@(\\)+@/@g; 627 627 } 628 628 … … 1026 1026 1027 1027 $filename = &util::filename_cat($base_dir, $filename); 1028 1028 1029 1029 # Replace %20's in URL with a space if required. Note that the filename 1030 1030 # may include the %20 in some situations … … 1092 1092 1093 1093 $before_hash = $self->eval_dir_dots($before_hash); 1094 1094 1095 1095 my $linkfilename = &util::filename_cat ($base_dir, $before_hash); 1096 1096 … … 1104 1104 1105 1105 return ($type . $before_hash, $hash_part, $rl); 1106 1106 1107 1107 } elsif ($link !~ /^(mailto|news|gopher|nntp|telnet|javascript):/i && $link !~ /^\//) { 1108 1108 if ($before_hash =~ s@^/@@ || $before_hash =~ /\\/) { … … 1129 1129 # $base_dir is already similarly "converted" on windows. 1130 1130 if ($win_before_hash =~ s@^$base_dir/@@o) { 1131 # if this is true, we removed a prefix1132 $before_hash=$win_before_hash;1131 # if this is true, we removed a prefix 1132 $before_hash=$win_before_hash; 1133 1133 } 1134 1134 } … … 1241 1241 # this assumes that ">" won't appear. (I don't think it's allowed to...) 1242 1242 $html_header =~ /^/; # match the start of the string, for \G assertion 1243 1243 1244 1244 while ($html_header =~ m/\G.*?<meta(.*?)>/sig) { 1245 1245 my $metatag=$1; … … 1284 1284 $tag='Creator'; 1285 1285 } elsif (!exists $find_fields{lc($tag)}) { 1286 next; # don't want this tag1286 next; # don't want this tag 1287 1287 } else { 1288 1288 # get the user's preferred capitalisation
