Changeset 32090

Show
Ignore:
Timestamp:
08.12.2017 20:16:36 (23 months ago)
Author:
ak19
Message:

Related to the previous commit, revision 32089. Some improvements.

Files:
2 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm

    r32089 r32090  
    266266 
    267267    my $fulltext = &FileUtils::readUTF8File($target_file_path); 
    268     #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help 
    269     $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML 
    270     &FileUtils::writeUTF8File($target_file_path, \$fulltext); 
     268    if(defined $fulltext) { 
     269        #$fulltext = &HTML::Entities::encode($fulltext); # doesn't seem to help 
     270        $fulltext =~ s@&@&@sg; # Kathy's fix to ensure doc contents don't break XML 
     271        &FileUtils::writeUTF8File($target_file_path, \$fulltext); 
     272    } else { 
     273        print STDERR "PDFBoxConverter::convert(): Unable to read from converted file\n"; 
     274        $had_error = 1; 
     275    } 
    271276    } 
    272277 
  • main/trunk/greenstone2/perllib/FileUtils.pm

    r32089 r32090  
    973973## @function readUTF8File() 
    974974# 
    975 # read contents from a file containing UTF8. 
    976 # 
    977 # Parameter filename, the filepath to read from 
     975# read contents from a file containing UTF8 using sysread, a fast implementation of file 'slurp' 
     976# 
     977# Parameter filename, the filepath to read from. 
     978# Returns undef if there was any trouble opening the file or reading from it. 
    978979# 
    979980sub readUTF8File 
     
    983984    print STDERR "@@@ Warning FileUtils::readFile() not yet implemented for parallel processing. Using regular version...\n"; 
    984985     
    985     open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 
     986    #open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 
     987 
     988    if(!open(FIN,"<$filename")) { 
     989    print STDERR "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n"; 
     990    return undef; 
     991    } 
    986992 
    987993    # decode the bytes in the file with UTF8 enc, 
     
    989995    binmode(FIN,":utf8"); 
    990996     
    991     my $contents; 
     997    my $contents = undef; 
    992998    # Read in the entire contents of the file in one hit 
    993999    sysread(FIN, $contents, -s FIN);