Changeset 7120 for trunk/gsdl
- Timestamp:
- 2004-03-25T16:45:10+12:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/pdftohtml.pl
r7018 r7120 163 163 $seen_textual_content=1; 164 164 } 165 # special - added to remove the filename from the title 166 # this should be in the header, before we see "textual content" 167 if ($line =~ m@<title>(.*?)</title>@) { 168 my $title=$1; 169 # is this title the name of a filename? 170 if (-r "$title.pdf" || -r "$title.html") { 171 # remove the title 172 $line =~ s@<title>.*?</title>@<title></title>@; 173 } 174 } 165 175 } 166 176 … … 199 209 my $directory=$output_filestem; 200 210 $directory =~ s@[^\/]*$@@; # assume filename has no embedded slashes... 201 211 # newer versions of pdftohtml don't seem to do images this way anymore? 202 212 if (open (IMAGES, "${directory}images.log") || 203 213 open (IMAGES, "${directory}image.log")) {
Note:
See TracChangeset
for help on using the changeset viewer.