Changeset 2817
- Timestamp:
- 2001-11-05T16:30:27+13:00 (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r2735 r2817 75 75 print STDERR " Used by, for example, PDFHtml to remove Page 1 etc from text\n"; 76 76 print STDERR " chosen to be used as the title.\n"; 77 print STDERR " -description_tags Split document into sub-sections where <Section> tags occur.\n"; 78 print STDERR " Note that by setting this option you implicitly set -no_metadata\n"; 79 print STDERR " as all metadata should be included within the <Section> tags.\n"; 80 print STDERR " Also, -keep_head will have no effect when this option is set.\n"; 77 81 } 78 82 … … 92 96 q^rename_assoc_files^, \$self->{'rename_assoc_files'}, 93 97 q^title_sub/.*/^, \$self->{'title_sub'}, 98 q^description_tags^, \$self->{'description_tags'}, 94 99 "allow_extra_options")) { 95 100 … … 139 144 140 145 $self->extract_metadata ($textref, $metadata, $doc_obj, $cursection) 141 unless $self->{'no_metadata'} ;146 unless $self->{'no_metadata'} || $self->{'description_tags'}; 142 147 143 148 # Store URL for page as metadata - this can be used for an … … 149 154 $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); 150 155 156 if ($self->{'description_tags'}) { 157 158 my $found_something = 0; my $top = 1; 159 while ($$textref =~ s/^(.*?)<!--(.*?)-->//s) { 160 my $text = $1; 161 my $comment = $2; 162 if (defined $text) { 163 $self->process_section(\$text, $base_dir, $file, $doc_obj, $cursection); 164 } 165 while ($comment =~ s/<([^>]+)>//s) { 166 my $tag = $1; 167 if ($tag eq "Section") { 168 $found_something = 1; 169 $cursection = $doc_obj->insert_section($doc_obj->get_end_child($cursection)) unless $top; 170 $top = 0; 171 } elsif ($tag eq "/Section") { 172 $found_something = 1; 173 $cursection = $doc_obj->get_parent_section ($cursection); 174 } elsif ($tag =~ /^Metadata name=\"([^\"]+)\"/s) { 175 my $metaname = $1; 176 $comment =~ s/^(.*?)<\/Metadata>//s; 177 my $metavalue = $1; 178 $metavalue =~ s/^\s+//; 179 $metavalue =~ s/\s+$//; 180 $doc_obj->set_utf8_metadata_element($cursection, $metaname, $metavalue); 181 } 182 } 183 } 184 if ($cursection ne "") { 185 print $outhandle "HTMLPlug: WARNING: $file contains unmatched <Section></Section> tags\n"; 186 } 187 188 $$textref =~ s/^.*?<body[^>]*>//is; 189 $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; 190 if ($$textref =~ /\S/) { 191 if (!$found_something) { 192 print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags so\n"; 193 print $outhandle " will be processed as a single section document\n"; 194 $self->process_section($$textref, $base_dir, $file, $doc_obj, $cursection); 195 } else { 196 print $outhandle "HTMLPlug: WARNING: $file contains the following text outside\n"; 197 print $outhandle " of the final closing </Section> tag. This text will\n"; 198 print $outhandle " be ignored."; 199 if (length($$textref) > 30) { 200 $text = substr($$textref, 0, 30) . "..."; 201 } 202 $text =~ s/\n/ /isg; 203 print $outhandle " ($text)\n"; 204 } 205 } 206 207 } else { 208 # single section document 209 $self->process_section($textref, $base_dir, $file, $doc_obj, $cursection); 210 } 211 return 1; 212 } 213 214 # note that process_section may be called multiple times for a single 215 # section (relying on the fact that add_utf8_text appends the text to any 216 # that may exist already). 217 sub process_section { 218 my $self = shift (@_); 219 my ($textref, $base_dir, $file, $doc_obj, $cursection) = @_; 220 151 221 # remove header and footer 152 if (!$self->{'keep_head'} ) {222 if (!$self->{'keep_head'} || $self->{'description_tags'}) { 153 223 $$textref =~ s/^.*?<body[^>]*>//is; 154 224 $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; … … 174 244 # add text to document object 175 245 $doc_obj->add_utf8_text($cursection, $$textref); 176 177 return 1; 178 } 179 180 181 246 } 182 247 183 248 sub replace_images {
Note:
See TracChangeset
for help on using the changeset viewer.