Changeset 8795
- Timestamp:
- 2004-12-14T12:31:58+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/PDFPlug.pm
r8716 r8795 150 150 151 151 # we have "<a name=1></a>" etc for each page 152 my @sections = split('<a name=', $$textref); 152 # it may be <A name= 153 my @sections = split('<[Aa] name=', $$textref); 153 154 154 155 if (scalar (@sections) == 1) { #only one section - no split! … … 161 162 162 163 my $title = $sections[0]; 163 $title =~ s/^\ d+>//; # specific for pdftohtml...164 $title =~ s/^\"?\w+\"?>//; # specific for pdftohtml... 164 165 $title =~ s/<\/([^>]+)><\1>//g; # (eg) </b><b> - no space 165 166 $title =~ s/<[^>]*>/ /g; … … 183 184 # add metadata per section... 184 185 foreach my $section (@sections) { 185 $section =~ s@^(\d+)></a>@@; # leftover from split expression... 186 # section names are not always just digits, may be like "outline" 187 $section =~ s@^\"?(\w+)\"?></a>@@; # leftover from split expression... 186 188 187 189 $title = $1; # Greenstone does magic if sections are titled digits 188 190 if (! defined($title) ) { 189 191 print STDERR "no title: $section\n"; 192 $title = " "; # get rid of the undefined warning in next line 190 193 } 191 194 my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n"; … … 237 240 238 241 # Add NumPages metadata (we have "<a name=1>" etc for each page) 239 my @pages = ($$textref =~ /\< a name=\d+\>/ig);242 my @pages = ($$textref =~ /\<[Aa] name=\"?\w+\"?>/ig); 240 243 $doc_obj->add_utf8_metadata($cursection, "NumPages", scalar(@pages)); 241 244 245 if ($self->{'use_sections'} && $self->{'converted_to'} eq "HTML") { 246 # we explicitly make it a paged document, cos greenstone won't get it 247 # right if any section has an empty title, or one with letters in it 248 $doc_obj->set_utf8_metadata_element ($cursection, "gsdlthistype", "Paged"); 249 } 242 250 return $result; 243 251 }
Note:
See TracChangeset
for help on using the changeset viewer.