Changeset 2715 for trunk/gsdl


Ignore:
Timestamp:
2001-08-23T18:00:26+12:00 (23 years ago)
Author:
jrm21
Message:

we now escape _ characters to protect against macro expansion, unless they
occur within html tags -- eg <img src="foo_bar.png">.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/pdftohtml.pl

    r2655 r2715  
    44###########################################################################
    55#
    6 # pdftohtml.pl -- convert documents to HTML or TEXT format
     6# pdftohtml.pl -- convert PDF documents to HTML format
    77#
    88# A component of the Greenstone digital library software
     
    1010# University of Waikato, New Zealand.
    1111#
    12 # Copyright (C) 1999 New Zealand Digital Library Project
     12# Copyright (C) 2001 New Zealand Digital Library Project
    1313#
    1414# This program is free software; you can redistribute it and/or modify
     
    119119    $cmd = "pdftohtml" if ($ENV{'GSDLOS'} =~ /^windows$/);
    120120
    121     if ($timeout) {$cmd = "ulimit -t $timeout; $cmd";}
    122121    $cmd .= " -noframes \"$input_filename\" \"$output_filestem.html\"";
    123122    $cmd .= " > \"$output_filestem.out\"";
     
    152151    $line =~ s#</b><b>##g;
    153152    $line =~ s#</i><i>##g;
    154     $line =~ s#\\#\\\\#g;
     153    $line =~ s#\\#\\\\#g; # until macro language parsing is fixed...
     154# escape underscores, but not if they're inside tags (eg img/href names)
     155    my $inatag = 0; # allow multi-line tags
     156    if ($line =~ /_/) {
     157        my @parts=split('_',$line);
     158        my $lastpart=pop @parts;
     159        foreach my $part (@parts) {
     160        if ($part =~ /<[^>]*$/) { # if we're starting a tag...
     161            $inatag=1;
     162        } elsif ($part =~ />[^<]*$/) { # closing a tag
     163            $inatag=0;
     164        }
     165        if ($inatag) {
     166            $part.='_';
     167        } else {
     168            $part.="&#95;";
     169        }
     170        }
     171        $line=join('',@parts,$lastpart);
     172    }
     173
    155174    print OUTFILE $line;
    156175    }
Note: See TracChangeset for help on using the changeset viewer.