Changeset 33301
- Timestamp:
- 2019-07-05T20:12:10+12:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/StructuredHTMLPlugin.pm
r33299 r33301 105 105 my @head_and_body = split(/<body/i,$$textref); 106 106 my $head = shift(@head_and_body); 107 my $body_text = join("<body", @head_and_body); 107 my $body_text = join("<body", @head_and_body); # won't actually work to prefix "<body" to just the body remaining in @head_and_body array, since only 1 element (the body) remains in @head_and_body 108 108 $head =~ m/<title>(.+)<\/title>/i; 109 109 my $doctitle = $1 if defined $1; … … 158 158 $body_text =~ s/(<p[^>]*><span[^>]*><o:p> <\/o:p><\/span><\/p>)//isg; 159 159 $body_text =~ s/(<p[^>]*><o:p> <\/o:p><\/p>)//isg; 160 161 160 161 # what was the following line for. effectively unused. do we need it?? 162 162 #$section_text .= "<!--\n<Section>\n-->\n"; 163 163 #my $top_section_tag = "<!--\n<Section>\n-->\n"; … … 165 165 #$body_text =~ s/(<div.*)/$top_section_tag$1/i; 166 166 my $body = "<body".$body_text; 167 167 168 168 my $section_text = $head; 169 169 … … 175 175 my $sectionh1 = 0; 176 176 $section_text .= shift(@h_split); 177 177 178 # When windows_scripting is on, WordPlugin invokes Word to convert the doc(x) file to HTML which is then 179 # processed by this StructuredHTMLPlugin. However, Word will embed the entire HTML body content inside a <div> 180 # This <div> becomes problematic, since in sectioned documents, the first section would end up starting with a div 181 # but not contain a matching closing div, while the final section will end with an unmatched closing div. 182 # So, as a hack, we remove any opening <div> appearing immediately after the <body> before the first <h>eading. 183 # And we'll set a flag to remember to remove any corresponding closing </div> before the closing </body>. 184 # So, now we look for any unclosed <div> elements in the preamble (pre-Headings) html that is in $section_text 185 my $remove_global_div = 0; 186 if($section_text =~ m/^(.*?)\s*<div[^>]*>\s*$/is) { 187 $section_text = $1; 188 $remove_global_div = 1; 189 print $outhandle "********** Found and removed a global opening <div> at start of html body, will monitor for closing div too.\n" 190 if $self->{'verbosity'} > 2; 191 } 192 178 193 my $hc; 179 194 foreach $hc ( @h_split ) … … 260 275 } 261 276 } 262 263 while ($hnum >= 1) 277 278 if($remove_global_div) { # then need to also handle a closing </div> tag for the global div too, and if one is present, remove it 279 $section_text =~ s@\s*</div[^>]*>(\s*</body>\s*</html>\s*)$@$1@is; 280 print $outhandle "********** Removing any matching closing global divider element\n" 281 if $self->{'verbosity'} > 2; 282 } 283 284 285 while ($hnum >= 1) 264 286 { 265 287 my $spacing = " " x $hnum; … … 271 293 272 294 $section_text .= "<!--\n</Section>\n-->\n"; 273 295 274 296 $$textref = $section_text; 275 297
Note:
See TracChangeset
for help on using the changeset viewer.