Changeset 32090


Ignore:
Timestamp:
2017-12-08T20:16:36+13:00 (4 years ago)
Author:
ak19
Message:

Related to the previous commit, revision 32089. Some improvements.

Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm

    r32089 r32090  
    266266
    267267    my $fulltext = &FileUtils::readUTF8File($target_file_path);
    268     #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help
    269     $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML
    270     &FileUtils::writeUTF8File($target_file_path, \$fulltext);
     268    if(defined $fulltext) {
     269        #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help
     270        $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML
     271        &FileUtils::writeUTF8File($target_file_path, \$fulltext);
     272    } else {
     273        print STDERR "PDFBoxConverter::convert(): Unable to read from converted file\n";
     274        $had_error = 1;
     275    }
    271276    }
    272277
  • main/trunk/greenstone2/perllib/FileUtils.pm

    r32089 r32090  
    973973## @function readUTF8File()
    974974#
    975 # read contents from a file containing UTF8.
    976 #
    977 # Parameter filename, the filepath to read from
     975# read contents from a file containing UTF8 using sysread, a fast implementation of file 'slurp'
     976#
     977# Parameter filename, the filepath to read from.
     978# Returns undef if there was any trouble opening the file or reading from it.
    978979#
    979980sub readUTF8File
     
    983984    print STDERR "@@@ Warning FileUtils::readFile() not yet implemented for parallel processing. Using regular version...\n";
    984985   
    985     open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n";
     986    #open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n";
     987
     988    if(!open(FIN,"<$filename")) {
     989    print STDERR "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n";
     990    return undef;
     991    }
    986992
    987993    # decode the bytes in the file with UTF8 enc,
     
    989995    binmode(FIN,":utf8");
    990996   
    991     my $contents;
     997    my $contents = undef;
    992998    # Read in the entire contents of the file in one hit
    993999    sysread(FIN, $contents, -s FIN);
Note: See TracChangeset for help on using the changeset viewer.