- Timestamp:
- 2008-03-29T17:53:51+13:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/bin/script/gsConvert.pl
r15120 r15152 614 614 # Inserted this code to remove the images directory if it was still empty after 615 615 # the html was generated (in case there were no images in the word document) 616 if( is_dir_empty($assoc_dir)) {617 print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n";616 if(&is_dir_empty($assoc_dir)) { 617 #print STDERR "***gsConvert.pl: Image dir $assoc_dir is empty, removing***\n"; 618 618 &util::rm_r($assoc_dir); 619 619 } else { # there was an image folder (it was generated) … … 621 621 # If the folder contains images 622 622 # Replace them with relative links instead, so it can be moved elsewhere 623 make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files");623 &make_links_to_assocdir_relative($toppath, $docname, "$output_filestem.html", $assoc_dir, $docname."_files"); 624 624 } 625 625 return 1; … … 640 640 641 641 return 0; 642 } 643 644 645 # A method to check if a directory is empty (note that an empty directory still has non-zero size!!!) 646 # Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831 647 sub is_dir_empty 648 { 649 my ($path) = @_; 650 opendir DIR, $path; 651 while(my $entry = readdir DIR) { 652 next if($entry =~ /^\.\.?$/); 653 closedir DIR; 654 return 0; 655 } 656 closedir DIR; 657 return 1; 642 658 } 643 659 … … 661 677 unless(open(FIN, "<$html_file")) { 662 678 print STDERR "gsConvert.pl: Unable to open $html_file for reading absolute urls...ERROR\n"; 663 return ;679 return 0; 664 680 } 665 681 # From http://perl.plover.com/local.html … … 667 683 # (Some people call this slurping the file.) Perl has a special feature to support this: 668 684 # If the $/ variable is undefined, the <...> operator will read the entire file all at once" 669 $/ = undef; # Read entire file at once 670 my $html_contents = <FIN>; # Now file is read in as one single 'line' 685 my $html_contents; 686 { 687 local $/ = undef; # Read entire file at once 688 $html_contents = <FIN>; # Now file is read in as one single 'line' 689 } 671 690 close(FIN); # close the file 672 print STDERR $html_contents;691 #print STDERR $html_contents; 673 692 674 693 # 2. Replace (substitute) *all* ocurrences of the assoc_dir_path in a hrefs and img src … … 677 696 # all new lines as a regular space. This interacts with g to consider all the lines 678 697 # together as a single line so that multi-occurrences can be replaced. 679 698 699 # we can't just replace $assoc_dir_path with $assoc_dir 700 # $assoc_dir_path represents a regular expression that needs to be replaced 701 # if it contains ., -, [ or ] -- which all have special meaning in Perl regular expressions -- 702 # we need to escape these first 703 my $safe_reg_expression = $assoc_dir_path; 704 $safe_reg_expression =~ s/\./\\./g; 705 $safe_reg_expression =~ s/\-/\\-/g; 706 $safe_reg_expression =~ s/\[/\\[/g; 707 $safe_reg_expression =~ s/\]/\\]/g; 708 $safe_reg_expression =~ s/ /%20/g; # wvWare put %20 in place of space, so we need to change our prefix to match 709 680 710 # The following regular expression substitution looks for <a or <image, followed by any other 681 711 # attributes and values until it comes to the FIRST (indicated by ?) href= or src= … … 689 719 # and performs a global replace (g) meaning that all occurrences that match in that single line 690 720 # are substituted. 691 $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$assoc_dir_path(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg; 692 #$html_contents =~ s/$assoc_dir_path/$assoc_dirname/gs; # this works, used as fall-back 693 721 $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)$safe_reg_expression(.*?(\"|\')?.*?>)/$1$assoc_dirname$5/sg; 722 #$html_contents =~ s/$safe_reg_expression/$assoc_dirname/gs; # this works, used as fall-back 723 # now replace any %20 chars in filenames of href or src attributes to use literal space ' '. Calls a function for this 724 $html_contents =~ s/(<(a|img).*?(href|src)=(\"|\')?)(.*)(.*?(\"|\')?.*?>)/&percent_twenty_to_space($1, $5, $6)/sge; 725 726 #print STDERR "assoc_dirname: ****$assoc_dirname***\n"; 727 #print STDERR "safe_reg_expression: ****$safe_reg_expression***\n"; 728 694 729 # delete the original file and recreate it 695 730 my $copy_of_filename = $html_file; … … 699 734 unless(open(FOUT, ">$html_file")) { # open it as a new file for writing 700 735 print STDERR "gsConvert.pl: Unable to open $html_file for writing relative links...ERROR\n"; 701 return ;736 return 0; 702 737 } 703 738 # write out the updated contents and close the file 704 739 print FOUT $html_contents; 705 740 close(FOUT); 706 707 } 708 709 # A method to check if directory is empty (note that an empty directory still has non-zero size!!!) 710 # Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831 711 sub is_dir_empty 741 return 1; 742 } 743 744 # Utility routine to convert all %20 introduced by wvWare in link pathnames into space again 745 sub percent_twenty_to_space 712 746 { 713 my ($path) = @_; 714 opendir DIR, $path; 715 while(my $entry = readdir DIR) { 716 next if($entry =~ /^\.\.?$/); 717 closedir DIR; 718 return 0; 719 } 720 closedir DIR; 721 return 1; 722 } 723 747 my ($pre, $text, $post) = @_; 748 749 $text =~ s/%20/ /g; 750 751 return "$pre$text$post"; 752 } 724 753 725 754 # Attempt to convert a word document to html with the word2html scripting program
Note:
See TracChangeset
for help on using the changeset viewer.