Changeset 10600
- Timestamp:
- 2005-09-07T09:18:27+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm
r10595 r10600 92 92 print $outhandle "StructuredHTMLPlug: processing $file\n" 93 93 if $self->{'verbosity'} > 1; 94 94 95 my @head_and_body = split(/<body/i,$$textref); 95 96 my $head = shift(@head_and_body); 96 97 my $body_text = join("<body", @head_and_body); 97 98 $head =~ m/<title>(.+)<\/title>/i; 99 my $doctitle = $1 if defined $1; 98 100 if (defined $self->{'extracted_word_metadata_fields'} && $self->{'extracted_word_metadata_fields'}=~ /\S/) { 99 101 my @doc_properties = split(/<xml>/i,$head); … … 108 110 # get rid of TOC and TOF sections and their title 109 111 if ($self->{'delete_toc'} == 1){ 110 #line-height:150%;mso-ansi-language:FR'>Contents<o:p></o:p></span></b></p>111 # get rid of Table of Contents title and Table of Figures112 # these two lines don't work - how can we do this properlly??113 112 if (defined $self->{'toc_header'}&& $self->{'toc_header'} =~ /\S/){ 114 113 $body_text =~ s/<p class=(($self->{'toc_header'})[^>]*)>(.+?)<\/p>//isg; … … 118 117 } 119 118 } 120 119 121 120 if (defined $self->{'title_header'} && $self->{'title_header'}=~ /\S/){ 122 121 $self->{'title_header'} =~ s/^(\()(.*)(\))/$2/is; 123 $body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><title>$3<\/title><\/p>/isg; 122 #$body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><title>$3<\/title><\/p>/isg; 123 #$doctitle = $3; 124 $body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h1>$3<\/h1><\/p>/isg; 125 #$body_text =~ m/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/isg; 126 #$doctitle = "<h1>".$3."<\/h1>" if defined $3; 124 127 } 125 128 … … 142 145 $body_text =~ s/(<p[^>]*><o:p> <\/o:p><\/p>)//isg; 143 146 147 $section_text .= "<!--\n<Section>\n-->\n"; 148 #my $top_section_tag = "<!--\n<Section>\n-->\n"; 149 #$body_text =~ s/(<div.*)/$top_section_text$doctitle$1/i; 150 #$body_text =~ s/(<div.*)/$top_section_tag$1/i; 144 151 my $body = "<body".$body_text; 145 152 146 153 my $section_text = $head; 147 $section_text .= "<!--\n<Section>\n-->\n";148 154 149 155 # split HTML text on <h1>, <h2> etc tags … … 203 209 } 204 210 205 my $spacing = " " x $hnum; 211 my $spacing = " " x $hnum; 206 212 $section_text .= "<!--\n"; 207 213 $section_text .= $spacing."<Section>\n"; … … 219 225 else { 220 226 ### print STDERR "***** hc = <h$hc\n\n"; 221 222 227 } 223 228 $section_text .= "<h$hc"; … … 275 280 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icondoc_"); 276 281 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>"); 277 278 my $file_size = -s $filename;282 $doc_obj->add_utf8_metadata ($cursection, "Title", $doctitle); 283 my $file_size = -s $filename; 279 284 if ($file_size>1024) 280 285 {
Note:
See TracChangeset
for help on using the changeset viewer.