Changeset 32089 for gs2-extensions
- Timestamp:
- 2017-12-08T19:16:12+13:00 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm
r27510 r32089 32 32 no strict 'subs'; # allow barewords (eg STDERR) as function arguments 33 33 34 #use HTML::Entities; # for encoding characters into their HTML entities when PDFBox converts to text 35 34 36 use gsprintf 'gsprintf'; 37 use FileUtils; 35 38 36 39 # these two variables mustn't be initialised here or they will get stuck … … 257 260 #print STDERR "**** item file: $target_file_path\n"; 258 261 } 259 262 elsif ($self->{'converted_to'} eq "text") { 263 # ensure html entities are doubly escaped for pdfbox to text conversion: & -> & 264 # conversion to html does it automatically, but conversion to text doesn't 265 # and this results in illegal characters in doc.xml 266 267 my $fulltext = &FileUtils::readUTF8File($target_file_path); 268 #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help 269 $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML 270 &FileUtils::writeUTF8File($target_file_path, \$fulltext); 271 } 272 260 273 if ($had_error) { 261 274 return (0, $result,$target_file_path);
Note:
See TracChangeset
for help on using the changeset viewer.