Changeset 32090
- Timestamp:
- 2017-12-08T20:16:36+13:00 (6 years ago)
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm
r32089 r32090 266 266 267 267 my $fulltext = &FileUtils::readUTF8File($target_file_path); 268 #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help 269 $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML 270 &FileUtils::writeUTF8File($target_file_path, \$fulltext); 268 if(defined $fulltext) { 269 #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help 270 $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML 271 &FileUtils::writeUTF8File($target_file_path, \$fulltext); 272 } else { 273 print STDERR "PDFBoxConverter::convert(): Unable to read from converted file\n"; 274 $had_error = 1; 275 } 271 276 } 272 277 -
main/trunk/greenstone2/perllib/FileUtils.pm
r32089 r32090 973 973 ## @function readUTF8File() 974 974 # 975 # read contents from a file containing UTF8. 976 # 977 # Parameter filename, the filepath to read from 975 # read contents from a file containing UTF8 using sysread, a fast implementation of file 'slurp' 976 # 977 # Parameter filename, the filepath to read from. 978 # Returns undef if there was any trouble opening the file or reading from it. 978 979 # 979 980 sub readUTF8File … … 983 984 print STDERR "@@@ Warning FileUtils::readFile() not yet implemented for parallel processing. Using regular version...\n"; 984 985 985 open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 986 #open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 987 988 if(!open(FIN,"<$filename")) { 989 print STDERR "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 990 return undef; 991 } 986 992 987 993 # decode the bytes in the file with UTF8 enc, … … 989 995 binmode(FIN,":utf8"); 990 996 991 my $contents ;997 my $contents = undef; 992 998 # Read in the entire contents of the file in one hit 993 999 sysread(FIN, $contents, -s FIN);
Note:
See TracChangeset
for help on using the changeset viewer.