Changeset 16024
- Timestamp:
- 2008-06-16T14:21:20+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/HTMLPlugin.pm
r15872 r16024 127 127 my $self = shift (@_); 128 128 my ($htmlfile, $text) = @_; 129 129 130 130 # load in the file 131 131 if (!open (FILE, $htmlfile)) { … … 152 152 my $self = shift (@_); 153 153 my ($foundbody, $text, $handle) = @_; 154 154 155 155 my $line = ""; 156 156 while (defined ($line = <$handle>)) { … … 174 174 } 175 175 176 if ($self->{'input_encoding'} eq "iso_8859_1") {176 if ($self->{'input_encoding'} eq "iso_8859_1") { 177 177 # convert to utf-8 178 178 $$text=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1", $text)); … … 221 221 return $section; 222 222 } 223 223 224 224 # Will convert the oldHDL format to the new HDL format (using the Section tag) 225 225 sub convert_to_newHDLformat 226 226 { 227 my $self = shift (@_);228 my ($file,$cnfile) = @_;229 my $input_filename = $file;230 my $tmp_filename = $cnfile;231 232 # write HTML tmp file with new HDL format233 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");234 235 # read in the file and do basic html cleaning (removing header etc)236 my $html = "";237 $self->HB_read_html_file ($input_filename, \$html);238 227 my $self = shift (@_); 228 my ($file,$cnfile) = @_; 229 my $input_filename = $file; 230 my $tmp_filename = $cnfile; 231 232 # write HTML tmp file with new HDL format 233 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 234 235 # read in the file and do basic html cleaning (removing header etc) 236 my $html = ""; 237 $self->HB_read_html_file ($input_filename, \$html); 238 239 239 # process the file one section at a time 240 240 my $curtoclevel = 1; … … 281 281 282 282 print PROD "<Section>\n\t<Description>\n\t\t<Metadata name=\"Title\">$title</Metadata>\n\t</Description>\n"; 283 283 284 284 print PROD "-->\n"; 285 285 286 286 # clean up the section html 287 287 $sectiontext = $self->HB_clean_section($sectiontext); … … 324 324 my ($file) = @_; 325 325 my $input_filename = $file; 326 326 327 327 if (-d $input_filename) 328 328 { … … 353 353 my $f_separator = &util::get_os_dirsep(); 354 354 355 if ($dirname =~ /import$f_separator/)355 if ($dirname =~ /import$f_separator/) 356 356 { 357 $test_dirname = $'; 357 $test_dirname = $'; #' 358 358 359 359 #print STDERR "init $'\n"; … … 361 361 while ($test_dirname =~ /[$f_separator]/) 362 362 { 363 364 365 366 $test_dirname = $';363 my $folderdirname = $`; 364 $tmp_dirname = &util::filename_cat($tmp_dirname,$folderdirname); 365 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname); 366 $test_dirname = $'; #' 367 367 } 368 368 } … … 377 377 if ($self->{'old_style_HDL'}) 378 378 { 379 380 379 $hdl_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix"); 380 $hdl_output_filename = $self->convert_to_newHDLformat($input_filename,$hdl_output_filename); 381 381 } 382 382 … … 388 388 foreach my $file (@files) 389 389 { 390 391 392 393 394 395 396 390 my $src_file = &util::filename_cat($base_dirname,$file); 391 my $dest_file = &util::filename_cat($tmp_dirname,$file); 392 if ((!-e $dest_file) && (!-d $src_file)) 393 { 394 # just copy the original file back to the tmp directory 395 copy($src_file,$dest_file) or die "Can't copy file $src_file to $dest_file $!"; 396 } 397 397 } 398 398 … … 401 401 if ($self->{'tidy_html'}) 402 402 { 403 404 403 $tidy_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix"); 404 $tidy_output_filename = $self->tmp_tidy_file($hdl_output_filename,$tidy_output_filename); 405 405 } 406 406 $tmp_filename = $tidy_output_filename; … … 410 410 if (!-e $tmp_filename) 411 411 { 412 413 412 # just copy the original file back to the tmp directory 413 copy($input_filename,$tmp_filename) or die "Can't copy file $input_filename to $tmp_filename $!"; 414 414 } 415 415 } … … 418 418 } 419 419 420 420 421 421 # Will make the html input file as a proper XML file with removed font tag and 422 422 # image size added to the img tag. … … 429 429 my $input_filename = $file; 430 430 my $tmp_filename = $cnfile; 431 431 432 432 # get the input filename 433 433 my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$"); 434 434 435 435 require HTML::TokeParser::Simple; 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 436 437 # create HTML parser to decode the input file 438 my $parser = HTML::TokeParser::Simple->new($input_filename); 439 440 # write HTML tmp file without the font tag and image size are added to the img tag 441 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 442 while (my $token = $parser->get_token()) 443 { 444 # is it an img tag 445 if ($token->is_start_tag('img')) 446 { 447 # get the attributes 448 my $attr = $token->return_attr; 449 450 # get the full path to the image 451 my $img_file = &util::filename_cat($dirname,$attr->{src}); 452 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 453 # set the width and height attribute 454 ($attr->{width}, $attr->{height}) = imgsize($img_file); 455 456 # recreate the tag 457 print PROD "<img"; 458 print PROD map { qq { $_="$attr->{$_}"} } keys %$attr; 459 print PROD ">"; 460 } 461 # is it a font tag 462 else 463 { 464 if (($token->is_start_tag('font')) || ($token->is_end_tag('font'))) 465 { 466 # remove font tag 467 print PROD ""; 468 } 469 else 470 { 471 # print without changes 472 print PROD $token->as_is; 473 } 474 } 475 } 476 close (PROD) || die("Error Closing File: $tmp_filename $!"); 477 478 # run html-tidy on the tmp file to make it a proper XML file 479 my $tidyfile = `tidy -utf8 -wrap 0 -asxml $tmp_filename`; 480 481 # write result back to the tmp file 482 open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!"); 483 print PROD $tidyfile; 484 close (PROD) || die("Error Closing File: $tmp_filename $!"); 485 485 486 486 # return the output filename … … 535 535 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 536 536 push(@{$hashArgOptLists->{"OptList"}},$options); 537 537 538 538 539 539 my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists); … … 609 609 } 610 610 611 611 612 612 # do plugin specific processing of doc_obj 613 613 sub process { … … 622 622 623 623 if ($ENV{'GSDLOS'} =~ /^windows/i) { 624 625 626 624 # this makes life so much easier... perl can cope with unix-style '/'s. 625 $base_dir =~ s@(\\)+@/@g; 626 $file =~ s@(\\)+@/@g; 627 627 } 628 628 … … 1026 1026 1027 1027 $filename = &util::filename_cat($base_dir, $filename); 1028 1028 1029 1029 # Replace %20's in URL with a space if required. Note that the filename 1030 1030 # may include the %20 in some situations … … 1092 1092 1093 1093 $before_hash = $self->eval_dir_dots($before_hash); 1094 1094 1095 1095 my $linkfilename = &util::filename_cat ($base_dir, $before_hash); 1096 1096 … … 1104 1104 1105 1105 return ($type . $before_hash, $hash_part, $rl); 1106 1106 1107 1107 } elsif ($link !~ /^(mailto|news|gopher|nntp|telnet|javascript):/i && $link !~ /^\//) { 1108 1108 if ($before_hash =~ s@^/@@ || $before_hash =~ /\\/) { … … 1129 1129 # $base_dir is already similarly "converted" on windows. 1130 1130 if ($win_before_hash =~ s@^$base_dir/@@o) { 1131 1132 1131 # if this is true, we removed a prefix 1132 $before_hash=$win_before_hash; 1133 1133 } 1134 1134 } … … 1241 1241 # this assumes that ">" won't appear. (I don't think it's allowed to...) 1242 1242 $html_header =~ /^/; # match the start of the string, for \G assertion 1243 1243 1244 1244 while ($html_header =~ m/\G.*?<meta(.*?)>/sig) { 1245 1245 my $metatag=$1; … … 1284 1284 $tag='Creator'; 1285 1285 } elsif (!exists $find_fields{lc($tag)}) { 1286 1286 next; # don't want this tag 1287 1287 } else { 1288 1288 # get the user's preferred capitalisation
Note:
See TracChangeset
for help on using the changeset viewer.