Context Navigation

← Previous Change
Next Change →

Changeset 15152 for gsdl

Timestamp:

2008-03-29T17:53:51+13:00 (16 years ago)

Author:

ak19

Message:

Regular expression in make_links_to_assocdir_relative is corrected and a utility subroutine is added

File:

: 1 edited

gsdl/trunk/bin/script/gsConvert.pl (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

gsdl/trunk/bin/script/gsConvert.pl

-              r15120
+              r15152
         # Inserted this code to remove the images directory if it was still empty after
         # the html was generated (in case there were no images in the word document)
         if(is_dir_empty($assoc_dir)) {
         print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n";
+        if(&is_dir_empty($assoc_dir)) {
+        #print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n";
         &util::rm_r($assoc_dir);
         } else { # there was an image folder (it was generated)
 …
         # If the folder contains images
         # Replace them with relative links instead, so it can be moved elsewhere
         make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files");
+        &make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files");
+        }
         return 1;
 …
     return 0;
+}
+# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
+# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
+sub is_dir_empty
+{
+    my ($path) = @_;
+    opendir DIR, $path;
+    while(my $entry = readdir DIR) {
+        next if($entry =~ /^\.\.?$/);
+        closedir DIR;
+        return 0;
+    }
+    closedir DIR;
+    return 1;
+}
 …
     unless(open(FIN, "<$html_file")) {
     print STDERR "gsConvert.pl: Unable to open $html_file for reading absolute urls...ERROR\n";
     return;
+    return 0;
+    }
     # From http://perl.plover.com/local.html
 …
     # (Some people call this slurping the file.) Perl has a special feature to support this:
     # If the $/ variable is undefined, the <...> operator will read the entire file all at once"
+    $/ = undef;                 # Read entire file at once
+    my $html_contents = <FIN>;  # Now file is read in as one single 'line'
+    my $html_contents;
+    {
+    local $/ = undef;        # Read entire file at once
+    $html_contents = <FIN>;  # Now file is read in as one single 'line'
+    }
     close(FIN); # close the file
     print STDERR $html_contents;
+    #print STDERR $html_contents;
     # 2. Replace (substitute) *all* ocurrences of the assoc_dir_path in a hrefs and img src
 …
     # all new lines as a regular space. This interacts with g to consider all the lines
     # together as a single line so that multi-occurrences can be replaced.
+    # we can't just replace $assoc_dir_path with $assoc_dir
+    # $assoc_dir_path represents a regular expression that needs to be replaced
+    # if it contains ., -, [ or ] -- which all have special meaning in Perl regular expressions --
+    # we need to escape these first
+    my $safe_reg_expression = $assoc_dir_path;
+    $safe_reg_expression =~ s/\./\\./g;
+    $safe_reg_expression =~ s/\-/\\-/g;
+    $safe_reg_expression =~ s/\[/\\[/g;
+    $safe_reg_expression =~ s/\]/\\]/g;
+    $safe_reg_expression =~ s/ /%20/g; # wvWare put %20 in place of space, so we need to change our prefix to match
     # The following regular expression substitution looks for <a or <image, followed by any other
     # attributes and values until it comes to the FIRST (indicated by ?) href= or src=
 …
     # and performs a global replace (g) meaning that all occurrences that match in that single line
     # are substituted.
+    $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$assoc_dir_path(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg;
+    #$html_contents =~ s/$assoc_dir_path/$assoc_dirname/gs; # this works, used as fall-back
+    $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$safe_reg_expression(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg;
+               #$html_contents =~ s/$safe_reg_expression/$assoc_dirname/gs; # this works, used as fall-back
+    # now replace any %20 chars in filenames of href or src attributes to use literal space ' '. Calls a function for this
+    $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)(.*)(.*?(\"|\')?.*?>)/&percent_twenty_to_space($1, $5, $6)/sge;
+    #print STDERR "assoc_dirname: ****$assoc_dirname***\n";
+    #print STDERR "safe_reg_expression: ****$safe_reg_expression***\n";
     # delete the original file and recreate it
     my $copy_of_filename = $html_file;
 …
     unless(open(FOUT, ">$html_file")) {  # open it as a new file for writing
     print STDERR "gsConvert.pl: Unable to open $html_file for writing relative links...ERROR\n";
     return;
+    return 0;
+    }
     # write out the updated contents and close the file
     print FOUT $html_contents;
     close(FOUT);
+}
+# A method to check if directory is empty (note that an empty directory still has non-zero size!!!)
+# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
+sub is_dir_empty
+    return 1;
+}
+# Utility routine to convert all %20 introduced by wvWare in link pathnames into space again
+sub percent_twenty_to_space
+{
+    my ($path) = @_;
+    opendir DIR, $path;
+    while(my $entry = readdir DIR) {
+        next if($entry =~ /^\.\.?$/);
+        closedir DIR;
+        return 0;
+    }
+    closedir DIR;
+    return 1;
+}
+    my ($pre, $text, $post) = @_;
+    $text =~ s/%20/ /g;
+    return "$pre$text$post";
+}
 # Attempt to convert a word document to html with the word2html scripting program

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 15152 for gsdl

Legend:

gsdl/trunk/bin/script/gsConvert.pl

Download in other formats: