Changeset 33299 for main/trunk
- Timestamp:
- 2019-07-04T19:10:03+12:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/StructuredHTMLPlugin.pm
r32332 r33299 159 159 $body_text =~ s/(<p[^>]*><o:p> <\/o:p><\/p>)//isg; 160 160 161 161 # what was the following line for. effectively unused. do we need it?? 162 162 #$section_text .= "<!--\n<Section>\n-->\n"; 163 163 #my $top_section_tag = "<!--\n<Section>\n-->\n"; … … 179 179 foreach $hc ( @h_split ) 180 180 { 181 if ($hc =~ m/^([1-3])\s*.*?>(.*)$/s) 182 { 183 my $new_hnum = $1; 184 my $hc_after = $2; 185 186 if ($hc_after =~ m/^(.*?)<\/h$new_hnum>/is) 187 { 188 my $h_text = $1; 189 $hc =~ s/^(\ \;)+/\ \;/g; 190 # boil HTML down to some interesting text 191 $h_text =~ s/^[1-3]>//; 192 $h_text =~ s/<\/?.*?>//sg; 193 $h_text =~ s/\s+/ /sg; 194 $h_text =~ s/^\s$//s; 195 $h_text =~ s/( )+\W*/ /sg; 196 197 if ($h_text =~ m/\w+/) 181 if ($hc =~ m/^([1-3])\s*.*?>(.*)$/s) 198 182 { 199 if ($new_hnum > $hnum)200 {201 # increase section nesting202 $hnum++;203 while ($hnum < $new_hnum)183 my $new_hnum = $1; 184 my $hc_after = $2; 185 my $filtered_hc = undef; 186 187 if ($hc_after =~ m/^(.*?)<\/h$new_hnum>(.*)$/is) 204 188 { 205 my $spacing = " " x $hnum; 206 $section_text .= "<!--\n"; 207 $section_text .= $spacing."<Section>\n"; 208 $section_text .= "-->\n"; 209 $hnum++; 189 my $h_text = $1; 190 $filtered_hc = $2; # This represents the remainder of $hc, after the (e.g.) <h2>xxxx</h2> element 191 192 $hc =~ s/^(\ \;)+/\ \;/g; 193 # boil HTML down to some interesting text 194 $h_text =~ s/^[1-3]>//; 195 $h_text =~ s/<\/?.*?>//sg; 196 $h_text =~ s/\s+/ /sg; 197 $h_text =~ s/^\s$//s; 198 $h_text =~ s/( )+\W*/ /sg; 199 200 if ($h_text =~ m/\w+/) 201 { 202 if ($new_hnum > $hnum) 203 { 204 # increase section nesting 205 $hnum++; 206 while ($hnum < $new_hnum) 207 { 208 my $spacing = " " x $hnum; 209 $section_text .= "<!--\n"; 210 $section_text .= $spacing."<Section>\n"; 211 $section_text .= "-->\n"; 212 $hnum++; 213 } 214 } 215 else # ($new_hnum <= $hnum) 216 { 217 # descrease section nesting 218 while ($hnum >= $new_hnum) 219 { 220 my $spacing = " " x $hnum; 221 $section_text .= "<!--\n"; 222 $section_text .= $spacing."</Section>\n"; 223 $section_text .= "-->\n"; 224 $hnum--; 225 } 226 $hnum++; 227 } 228 229 my $spacing = " " x $hnum; 230 $section_text .= "<!--\n"; 231 $section_text .= $spacing."<Section>\n"; 232 $section_text .= $spacing." <Description>\n"; 233 $section_text .= $spacing." <Metadata name=\"Title\">$h_text</Metadata>"; 234 $section_text .= $spacing." </Description>\n"; 235 $section_text .= "-->\n"; 236 237 #print $outhandle $spacing."$h_text\n" 238 # if $self->{'verbosity'} > 2; 239 240 $sectionh1++ if ($hnum==1); 241 } 210 242 } 211 } 212 else # ($new_hnum <= $hnum) 213 { 214 # descrease section nesting 215 while ($hnum >= $new_hnum) 216 { 217 my $spacing = " " x $hnum; 218 $section_text .= "<!--\n"; 219 $section_text .= $spacing."</Section>\n"; 220 $section_text .= "-->\n"; 221 $hnum--; 243 else { 244 ### print STDERR "***** hc = <h$hc\n\n"; 222 245 } 223 $hnum++; 224 } 225 226 my $spacing = " " x $hnum; 227 $section_text .= "<!--\n"; 228 $section_text .= $spacing."<Section>\n"; 229 $section_text .= $spacing." <Description>\n"; 230 $section_text .= $spacing." <Metadata name=\"Title\">$h_text</Metadata>"; 231 $section_text .= $spacing." </Description>\n"; 232 $section_text .= "-->\n"; 233 234 #print $outhandle $spacing."$h_text\n" 235 # if $self->{'verbosity'} > 2; 236 237 $sectionh1++ if ($hnum==1); 246 247 # This can probably be replaced by the first statement in the if-statement, because 248 # $filtered_hc should always be defined when it is assigned above (even if it resolves 249 # to be an empty string) 250 if (defined $filtered_hc) { 251 $section_text .= $filtered_hc; 252 } 253 else { 254 $section_text .= "<h$hc"; 255 } 238 256 } 239 } 240 else { 241 ### print STDERR "***** hc = <h$hc\n\n"; 242 } 243 $section_text .= "<h$hc"; 244 } 245 else 246 { 247 $section_text .= "<h$hc"; 248 } 257 else 258 { 259 $section_text .= "<h$hc"; 260 } 249 261 } 250 262 -
main/trunk/greenstone2/perllib/unicode.pm
r28796 r33299 765 765 } 766 766 767 # perl version of stringToHex 767 768 # Useful method to print UTF8 (or other unicode) for debugging. 768 769 # Characters that are easily displayed (that is, printable ASCII)
Note:
See TracChangeset
for help on using the changeset viewer.