Changeset 15839

Show
Ignore:
Timestamp:
30.05.2008 19:39:00 (11 years ago)
Author:
ak19
Message:

Corrected bug that failed on PDFs with funny characters. Instead of moving (copy & delete original) the html file created, it now reads the orig file's contents and ensures it is UTF8. Now replacing srcdocs with html in such cases succeeds, just as pdfs processed by PDFPlug.pm would.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/replace_srcdoc_with_html.pl

    r15169 r15839  
    187187    } 
    188188 
    189     # Now we know we have no file name collisions 
    190     # use util to move it all to the import directory where the original file is located 
    191        #&util::mv($output_filename, &util::filename_cat($import_dir, $tmp_name.$ext)); # doesn't work 
    192        #&util::mv($output_filename, $import_dir); # doesn't work 
    193     &util::cp($output_filename, $import_dir); 
     189    # Now we know we have no file name collisions. We 'move' the html file by copying its 
     190    # contents over and ensuring that these contents are utf8. If we don't do this, PDFs 
     191    # replaced by html may fail, whereas those converted with PDFPlug with have succeeded. 
     192    open(FIN,"<$output_filename") or die "replace_srcdoc_with_html.pl: Unable to open $output_filename to ensure utf8...ERROR: $!\n"; 
     193    my $html_contents; 
     194    { 
     195    local $/ = undef;        # Read entire file at once 
     196    $html_contents = <FIN>;  # Now file is read in as one single 'line' 
     197    unicode::ensure_utf8(\$html_contents); # turn any high bytes that aren't valid utf-8 into utf-8. 
     198    } 
     199    close(FIN);  
     200 
     201    # write the utf8 contents to the new file and delete the original. 
     202    open(FOUT, ">$new_file") or die "replace_srcdoc_with_html.pl: Unable to open $new_file for writing out utf8 html...ERROR: $!\n"; 
     203    print FOUT $html_contents; 
     204    close(FOUT); 
    194205    &util::rm($output_filename); 
    195     if(-e $assoc_folder) { # move any associated folders too 
     206     
     207    # move any associated folders containing associated files too 
     208    if(-e $assoc_folder) {  
    196209    #print STDERR "****Folder for associated files is $assoc_folder\n"; 
    197210        #&util::mv($assoc_folder, $import_dir); # doesn't work for me