Changeset 15152


Ignore:
Timestamp:
2008-03-29T17:53:51+13:00 (14 years ago)
Author:
ak19
Message:

Regular expression in make_links_to_assocdir_relative is corrected and a utility subroutine is added

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/gsConvert.pl

    r15120 r15152  
    614614        # Inserted this code to remove the images directory if it was still empty after
    615615        # the html was generated (in case there were no images in the word document)
    616         if(is_dir_empty($assoc_dir)) {
    617         print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n";
     616        if(&is_dir_empty($assoc_dir)) {
     617        #print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n";
    618618        &util::rm_r($assoc_dir);
    619619        } else { # there was an image folder (it was generated)
     
    621621        # If the folder contains images
    622622        # Replace them with relative links instead, so it can be moved elsewhere
    623         make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files");   
     623        &make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files");   
    624624        }
    625625        return 1;
     
    640640   
    641641    return 0;
     642}
     643
     644
     645# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
     646# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
     647sub is_dir_empty
     648{
     649    my ($path) = @_;
     650    opendir DIR, $path;
     651    while(my $entry = readdir DIR) {
     652        next if($entry =~ /^\.\.?$/);
     653        closedir DIR;
     654        return 0;
     655    }
     656    closedir DIR;
     657    return 1;
    642658}
    643659
     
    661677    unless(open(FIN, "<$html_file")) {
    662678    print STDERR "gsConvert.pl: Unable to open $html_file for reading absolute urls...ERROR\n";
    663     return;
     679    return 0;
    664680    }
    665681    # From http://perl.plover.com/local.html
     
    667683    # (Some people call this slurping the file.) Perl has a special feature to support this:
    668684    # If the $/ variable is undefined, the <...> operator will read the entire file all at once"
    669     $/ = undef;                 # Read entire file at once
    670     my $html_contents = <FIN>;  # Now file is read in as one single 'line'
     685    my $html_contents;
     686    {
     687    local $/ = undef;        # Read entire file at once
     688    $html_contents = <FIN>;  # Now file is read in as one single 'line'
     689    }
    671690    close(FIN); # close the file
    672     print STDERR $html_contents;
     691    #print STDERR $html_contents;
    673692   
    674693    # 2. Replace (substitute) *all* ocurrences of the assoc_dir_path in a hrefs and img src
     
    677696    # all new lines as a regular space. This interacts with g to consider all the lines
    678697    # together as a single line so that multi-occurrences can be replaced.
    679    
     698
     699    # we can't just replace $assoc_dir_path with $assoc_dir
     700    # $assoc_dir_path represents a regular expression that needs to be replaced
     701    # if it contains ., -, [ or ] -- which all have special meaning in Perl regular expressions --
     702    # we need to escape these first
     703    my $safe_reg_expression = $assoc_dir_path;
     704    $safe_reg_expression =~ s/\./\\./g;
     705    $safe_reg_expression =~ s/\-/\\-/g;
     706    $safe_reg_expression =~ s/\[/\\[/g;
     707    $safe_reg_expression =~ s/\]/\\]/g;
     708    $safe_reg_expression =~ s/ /%20/g; # wvWare put %20 in place of space, so we need to change our prefix to match
     709
    680710    # The following regular expression substitution looks for <a or <image, followed by any other
    681711    # attributes and values until it comes to the FIRST (indicated by ?) href= or src=
     
    689719    # and performs a global replace (g) meaning that all occurrences that match in that single line
    690720    # are substituted.
    691     $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$assoc_dir_path(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg;
    692     #$html_contents =~ s/$assoc_dir_path/$assoc_dirname/gs; # this works, used as fall-back
    693 
     721    $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$safe_reg_expression(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg;
     722               #$html_contents =~ s/$safe_reg_expression/$assoc_dirname/gs; # this works, used as fall-back
     723    # now replace any %20 chars in filenames of href or src attributes to use literal space ' '. Calls a function for this
     724    $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)(.*)(.*?(\"|\')?.*?>)/&percent_twenty_to_space($1, $5, $6)/sge;
     725   
     726    #print STDERR "assoc_dirname: ****$assoc_dirname***\n";
     727    #print STDERR "safe_reg_expression: ****$safe_reg_expression***\n";
     728   
    694729    # delete the original file and recreate it
    695730    my $copy_of_filename = $html_file;
     
    699734    unless(open(FOUT, ">$html_file")) {  # open it as a new file for writing
    700735    print STDERR "gsConvert.pl: Unable to open $html_file for writing relative links...ERROR\n";
    701     return;
     736    return 0;
    702737    }
    703738    # write out the updated contents and close the file
    704739    print FOUT $html_contents;
    705740    close(FOUT);
    706    
    707 }
    708 
    709 # A method to check if directory is empty (note that an empty directory still has non-zero size!!!)
    710 # Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
    711 sub is_dir_empty
     741    return 1;
     742}
     743
     744# Utility routine to convert all %20 introduced by wvWare in link pathnames into space again
     745sub percent_twenty_to_space
    712746{
    713     my ($path) = @_;
    714     opendir DIR, $path;
    715     while(my $entry = readdir DIR) {
    716         next if($entry =~ /^\.\.?$/);
    717         closedir DIR;
    718         return 0;
    719     }
    720     closedir DIR;
    721     return 1;
    722 }
    723 
     747    my ($pre, $text, $post) = @_;
     748
     749    $text =~ s/%20/ /g;
     750
     751    return "$pre$text$post";
     752}
    724753
    725754# Attempt to convert a word document to html with the word2html scripting program
Note: See TracChangeset for help on using the changeset viewer.