Changeset 3410 for trunk/gsdl/bin


Ignore:
Timestamp:
2002-08-29T15:53:15+12:00 (22 years ago)
Author:
jrm21
Message:

now post-filter the html so that the filename is not included for anchor
links. (eg <a href="thisdoc.html#4"> is now just <a href="#4">)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/pdftohtml.pl

    r2976 r3410  
    7575    my $input_filename = $ARGV[0];
    7676    my $output_filestem = $ARGV[1];
    77     $output_filestem =~ s/\.html$//; # pdftohtml adds this suffix
     77    $output_filestem =~ s/\.html$//i; # pdftohtml adds this suffix
    7878
    7979    my @dir = split (/(\/|\\)/, $input_filename);
    80     pop(@dir);
     80    my $input_basename = pop(@dir);
     81    $input_basename =~ s/\.pdf//i;
    8182    my $dir = join ("", @dir);
    8283
     
    144145    }
    145146
     147    # relative hrefs to own document...
     148    $line =~ s@href=\"$input_basename\.html\#@href=\"\#@go;
    146149# escape underscores, but not if they're inside tags (eg img/href names)
    147150    my $inatag = 0; # allow multi-line tags
Note: See TracChangeset for help on using the changeset viewer.