Changeset 2715 for trunk/gsdl
- Timestamp:
- 2001-08-23T18:00:26+12:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/pdftohtml.pl
r2655 r2715 4 4 ########################################################################### 5 5 # 6 # pdftohtml.pl -- convert documents to HTML or TEXTformat6 # pdftohtml.pl -- convert PDF documents to HTML format 7 7 # 8 8 # A component of the Greenstone digital library software … … 10 10 # University of Waikato, New Zealand. 11 11 # 12 # Copyright (C) 1999New Zealand Digital Library Project12 # Copyright (C) 2001 New Zealand Digital Library Project 13 13 # 14 14 # This program is free software; you can redistribute it and/or modify … … 119 119 $cmd = "pdftohtml" if ($ENV{'GSDLOS'} =~ /^windows$/); 120 120 121 if ($timeout) {$cmd = "ulimit -t $timeout; $cmd";}122 121 $cmd .= " -noframes \"$input_filename\" \"$output_filestem.html\""; 123 122 $cmd .= " > \"$output_filestem.out\""; … … 152 151 $line =~ s#</b><b>##g; 153 152 $line =~ s#</i><i>##g; 154 $line =~ s#\\#\\\\#g; 153 $line =~ s#\\#\\\\#g; # until macro language parsing is fixed... 154 # escape underscores, but not if they're inside tags (eg img/href names) 155 my $inatag = 0; # allow multi-line tags 156 if ($line =~ /_/) { 157 my @parts=split('_',$line); 158 my $lastpart=pop @parts; 159 foreach my $part (@parts) { 160 if ($part =~ /<[^>]*$/) { # if we're starting a tag... 161 $inatag=1; 162 } elsif ($part =~ />[^<]*$/) { # closing a tag 163 $inatag=0; 164 } 165 if ($inatag) { 166 $part.='_'; 167 } else { 168 $part.="_"; 169 } 170 } 171 $line=join('',@parts,$lastpart); 172 } 173 155 174 print OUTFILE $line; 156 175 }
Note:
See TracChangeset
for help on using the changeset viewer.