Changeset 11893
- Timestamp:
- 2006-05-31T10:33:25+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm
r11884 r11893 96 96 } 97 97 98 sub read {99 my $self = shift (@_);100 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;101 102 my $filename = $file;103 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;104 105 if ($filename =~ m/\.html?$/) {106 my $poss_doc_filename = $filename;107 $poss_doc_filename =~ s/\.html?$/.doc/;108 109 if (-e $poss_doc_filename) {110 # this file has already been processed by Word plugin111 return 0;112 }113 }114 return $self->SUPER::read(@_);115 }116 98 117 99 sub process { 118 100 my $self = shift (@_); 119 #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;120 101 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 121 102 my $outhandle = $self->{'outhandle'}; … … 139 120 } 140 121 141 # If delete_toc is enables, it means to get rid of toc and tof contents. 122 # set the title here if we haven't found it yet 123 if (!defined $doc_obj->get_metadata_element ($doc_obj->get_top_section(), "Title")) { 124 if (defined $doctitle && $doctitle =~ /\S/) { 125 $doc_obj->add_metadata($doc_obj->get_top_section(), "Title", $doctitle); 126 } else { 127 $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$file); 128 } 129 } 130 131 # If delete_toc is enabled, it means to get rid of toc and tof contents. 142 132 # get rid of TOC and TOF sections and their title 143 133 if (defined $self->{'delete_toc'} && ($self->{'delete_toc'} == 1)){ … … 149 139 if (defined $self->{'title_header'} && $self->{'title_header'}=~ /\S/){ 150 140 $self->{'title_header'} =~ s/^(\()(.*)(\))/$2/is; 151 #$body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><title>$3<\/title><\/p>/isg;152 #$doctitle = $3;153 141 $body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h1>$3<\/h1><\/p>/isg; 154 #$body_text =~ m/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/isg;155 #$doctitle = "<h1>".$3."<\/h1>" if defined $3;156 142 } 157 143 … … 170 156 $body_text =~ s/<p class=(($self->{'level3_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h3>$3<\/h3><\/p>/isg; 171 157 } 158 172 159 # Tidy up extra new lines 173 160 $body_text =~ s/(<p[^>]*><span[^>]*><o:p> <\/o:p><\/span><\/p>)//isg; … … 246 233 $section_text .= "-->\n"; 247 234 248 print $outhandle $spacing."$h_text\n"249 if $self->{'verbosity'} > 2;235 #print $outhandle $spacing."$h_text\n" 236 # if $self->{'verbosity'} > 2; 250 237 251 238 $sectionh1++ if ($hnum==1); … … 276 263 $$textref = $section_text; 277 264 278 # should be textref not testref??? 279 #$$testref =~ s/<h(\d+)>(.*?)<\/h$1>/<Section><Metadata name=\"Title\">$1<\/Metadata></Section><h$1><\/h$1>/gi; 280 281 if ($sectionh1>0) 282 { 283 print $outhandle " Located section headings ..." 284 if $self->{'verbosity'} > 1; 285 } 286 print $outhandle " Passing on the HTMLPlug\n" 287 if $self->{'verbosity'} > 1; 265 # if ($sectionh1>0) 266 # { 267 # print $outhandle " Located section headings ..." 268 # if $self->{'verbosity'} > 1; 269 # } 288 270 289 271 $$textref =~ s/<!\[if !vml\]>/<![if vml]>/g; … … 295 277 $self->SUPER::process(@_); 296 278 297 # associate original file with doc object298 my $cursection = $doc_obj->get_top_section();299 my $filename = &util::filename_cat($base_dir, $file);300 if (-e $filename)301 {302 print $outhandle " Adding associated Word document\n"303 if $self->{'verbosity'} > 1;304 305 $doc_obj->associate_file($filename, "doc.doc", undef, $cursection);306 307 my $doclink = "<a href=_httpprefix_/collect/[collection]/index/assoc/[archivedir]/doc.doc>";308 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);309 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icondoc_");310 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");311 $doc_obj->add_utf8_metadata ($cursection, "Title", $doctitle);312 my $file_size = -s $filename;313 if ($file_size>1024)314 {315 my $fs_kbytes = sprintf("%d",$file_size/1024);316 $doc_obj->add_utf8_metadata ($cursection, "filesize", "$fs_kbytes Kb");317 }318 else319 {320 $doc_obj->add_utf8_metadata ($cursection, "filesize", "$file_size bytes");321 }322 323 if ($file_size > 200000)324 {325 $doc_obj->add_utf8_metadata ($cursection, "fswarning", "1");326 }327 }328 279 } 329 280 … … 363 314 364 315 if (($img_width < $actual_width) || ($img_height < $actual_height)) { 365 print $outhandle "Resizing $img_filename\n" if ($verbosity > 0);316 #print $outhandle "Resizing $img_filename\n" if ($verbosity > 0); 366 317 367 318 # derive new image name based on current image … … 378 329 my $command = "convert -interlace plane -verbose " 379 330 ."-geometry $newsize \"img_$filename\" \"$resized_filename\""; 380 print $outhandle "ImageResize: $command\n" if ($verbosity > 2);381 my $result = '';382 print $outhandle "ImageResize result: $result\n" if ($verbosity > 2);331 #print $outhandle "ImageResize: $command\n" if ($verbosity > 2); 332 #my $result = ''; 333 #print $outhandle "ImageResize result: $result\n" if ($verbosity > 2); 383 334 } 384 335 } … … 438 389 $value = $1; 439 390 if (!defined $value || !defined $tag){ 440 print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";391 #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n"; 441 392 next; 442 393 } else { … … 444 395 chomp($value); # remove trailing \n, if any 445 396 $tag = $find_fields{lc($tag)}; 446 print $outhandle " extracted \"$tag\" metadata \"$value\"\n"447 if ($self->{'verbosity'} > 2);397 #print $outhandle " extracted \"$tag\" metadata \"$value\"\n" 398 # if ($self->{'verbosity'} > 2); 448 399 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $tag, $value); 449 400 }
Note:
See TracChangeset
for help on using the changeset viewer.