Changeset 7120 for trunk/gsdl


Ignore:
Timestamp:
2004-03-25T16:45:10+12:00 (20 years ago)
Author:
jrm21
Message:

remove the title added by pdftohtml if the title is merely the filename.
This way HTMLPlug will give it a more meaningful title based on the first
sentence.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/pdftohtml.pl

    r7018 r7120  
    163163        $seen_textual_content=1;
    164164        }
     165        # special - added to remove the filename from the title
     166        # this should be in the header, before we see "textual content"
     167        if ($line =~ m@<title>(.*?)</title>@) {
     168        my $title=$1;
     169        # is this title the name of a filename?
     170        if (-r "$title.pdf" || -r "$title.html") {
     171            # remove the title
     172            $line =~ s@<title>.*?</title>@<title></title>@;
     173        }
     174        }
    165175    }
    166176
     
    199209    my $directory=$output_filestem;
    200210    $directory =~ s@[^\/]*$@@;    # assume filename has no embedded slashes...
    201 
     211    # newer versions of pdftohtml don't seem to do images this way anymore?
    202212    if (open (IMAGES, "${directory}images.log") ||
    203213    open (IMAGES, "${directory}image.log")) {
Note: See TracChangeset for help on using the changeset viewer.