Changeset 23347
- Timestamp:
- 2010-11-26T09:43:59+13:00 (13 years ago)
- Location:
- main/trunk/greenstone2/perllib/plugins
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
r23335 r23347 530 530 # check if the filename is already in UTF8. If it is, then we're done. 531 531 if($filename_encoding =~ m/auto/) { 532 if(&unicode::check_is_utf8($filemeta))533 {534 535 536 }532 if(&unicode::check_is_utf8($filemeta)) 533 { 534 $filename_encoding = "utf8"; 535 return $filemeta; 536 } 537 537 } 538 538 … … 540 540 if ($filename_encoding eq "auto") 541 541 { 542 # try textcat543 $filename_encoding = $self->textcat_encoding($filemeta);542 # try textcat 543 $filename_encoding = $self->textcat_encoding($filemeta); 544 544 545 # check the locale next546 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";545 # check the locale next 546 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 547 547 548 549 # now try the encoding of the document, if available550 if ($filename_encoding eq "undefined" && defined $file_encoding) {551 552 }548 549 # now try the encoding of the document, if available 550 if ($filename_encoding eq "undefined" && defined $file_encoding) { 551 $filename_encoding = $file_encoding; 552 } 553 553 554 554 } … … 633 633 634 634 my $outhandle = $self->{'outhandle'}; 635 636 print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n"; 637 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0); 638 print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 639 635 640 636 641 my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end) … … 791 796 { 792 797 my $self = shift (@_); 793 my ($file,$metadata ) = @_;798 my ($file,$metadata,$plugin_filename_encoding) = @_; 794 799 795 800 my $gs_filename_encoding = $metadata->{"gs.filename_encoding"}; … … 798 803 # Start by looking for manually assigned metadata 799 804 if (defined $gs_filename_encoding) { 800 if (ref ($gs_filename_encoding) eq "ARRAY") { 801 my $outhandle = $self->{'outhandle'}; 802 803 $deduced_filename_encoding = $gs_filename_encoding->[0]; 804 805 my $num_vals = scalar(@$gs_filename_encoding); 806 if ($num_vals>1) { 807 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 808 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 809 } 810 } 811 else { 812 $deduced_filename_encoding = $gs_filename_encoding; 813 } 814 } 815 816 # binmode(STDERR,":utf8"); 817 818 # print STDERR "**** file = $file\n"; 819 # print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";; 820 821 # print STDERR "******* dfe = $deduced_filename_encoding\n"; 822 805 if (ref ($gs_filename_encoding) eq "ARRAY") { 806 my $outhandle = $self->{'outhandle'}; 807 808 $deduced_filename_encoding = $gs_filename_encoding->[0]; 809 810 my $num_vals = scalar(@$gs_filename_encoding); 811 if ($num_vals>1) { 812 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 813 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 814 } 815 } 816 else { 817 $deduced_filename_encoding = $gs_filename_encoding; 818 } 819 } 820 823 821 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 824 # Look to see if plugin specifies this value 822 # Look to see if plugin specifies this value 823 824 if (defined $plugin_filename_encoding) { 825 # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options) 826 if ($plugin_filename_encoding =~ m/^auto-.*$/) { 827 my $outhandle = $self->{'outhandle'}; 828 print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n"; 829 print $outhandle " default to 'auto'\n"; 830 $self->{'filename_encoding'} = $plugin_filename_encoding = "auto"; 831 } 832 833 if ($plugin_filename_encoding ne "auto") { 834 # We've been given a specific filenamne encoding 835 # => so use it! 836 $deduced_filename_encoding = $plugin_filename_encoding; 837 } 838 } 825 839 } 826 840 827 841 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 828 # See if we can determine the file system encoding through locale 829 # Unix only ? 842 # See if we can determine the file system encoding through locale 843 $deduced_filename_encoding = $self->locale_encoding(); 844 845 # if locale shows us filesystem is utf8, check to see filename is consistent 846 # => if not, then we have an "alien" filename on our hands 847 848 if ($deduced_filename_encoding =~ m/^utf-?8$/i) { 849 if (!&unicode::check_is_utf8($file)) { 850 # "alien" filename, so revert 851 $deduced_filename_encoding = undef; 852 } 853 } 854 } 855 856 857 # if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 858 # # Last chance, apply textcat to deduce filename encoding 859 # $deduced_filename_encoding = $self->textcat_encoding($file); 860 # } 861 862 if ($self->{'verbosity'}>3) { 863 my $outhandle = $self->{'outhandle'}; 864 865 if (defined $deduced_filename_encoding) { 866 print $outhandle " Deduced filename encoding as: $deduced_filename_encoding\n"; 867 } 868 else { 869 print $outhandle " No filename encoding deduced\n"; 870 } 871 } 830 872 831 # if locale shows us filesystem is utf8, check to see filename is consistent832 # => if not, then we have an "alien" filename on our hands833 }834 835 836 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {837 # Last chance, apply textcat to deduce filename encoding838 }839 840 873 return $deduced_filename_encoding; 841 874 } … … 861 894 862 895 # UTF-8 version of filename 863 print STDERR "**** setting Source Metadata given: $raw_file\n"; 864 865 ## my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding); 896 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 897 print STDERR "****** Setting Source Metadata given: $raw_file\n"; 898 } 866 899 867 900 my $url_encoded_filename; 868 901 if (defined $filename_encoding) { 869 # => Generate a pretty print version of filename that is mapped to Unicode 870 871 # Use filename_encoding to map raw filename to a Perl unicode-aware string 872 $url_encoded_filename = decode($filename_encoding,$raw_file); 873 874 print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n"; 902 # => Generate a pretty print version of filename that is mapped to Unicode 903 904 # Use filename_encoding to map raw filename to a Perl unicode-aware string 905 $url_encoded_filename = decode($filename_encoding,$raw_file); 875 906 } 876 907 else { 877 # otherwise generate %xx encoded version of filename for char > 127 878 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 879 } 880 881 print STDERR "***** saving Source as: $url_encoded_filename\n"; 908 # otherwise generate %xx encoded version of filename for char > 127 909 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 910 } 911 912 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 913 print STDERR "***** saving Source as: $url_encoded_filename\n"; 914 } 882 915 883 916 … … 893 926 $renamed_raw_url); 894 927 895 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 928 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 929 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 930 } 896 931 } 897 932 … … 953 988 954 989 955 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 990 my $plugin_filename_encoding = $self->{'filename_encoding'}; 991 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 956 992 $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding); 957 993 -
main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm
r23335 r23347 379 379 380 380 my ($tailname,$dirname) = &File::Basename::fileparse($file); 381 print STDERR "***!! file = $file\n"; 381 382 382 # my $utf8_file = $self->filename_to_utf8_metadata($file); 383 383 # $utf8_file =~ s/&\#095;/_/g; 384 384 my $utf8_file = &unicode::raw_filename_to_url_encoded($tailname); 385 print STDERR "***!! utf8_file = $utf8_file\n"; 385 386 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 387 print STDERR "***!! file = $file\n"; 388 print STDERR "***!! utf8_file = $utf8_file\n"; 389 } 390 386 391 387 392 my $web_url = "http://"; … … 758 763 759 764 # If web page didn't give encoding, then default to utf8 760 print "*************** looking up $file\n"; 765 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 766 print STDERR "*** Web page didn't give encoding, defaulting to UTF8!\n"; 767 print STDERR "***** looking up $file\n"; 768 } 761 769 762 770 my $content_encoding= $self->{'content_encoding'} || "utf8"; … … 767 775 768 776 &ghtml::urlsafe ($href); 769 print STDERR "***!!! href=$href\n"; 777 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 778 print STDERR "***!!! href=$href\n"; 779 } 780 770 781 771 782 return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back; … … 818 829 if (!-e $filename) { 819 830 # try the original filename stored in map 820 print STDERR "***###!! orig filename did not exist: $filename\n"; 831 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 832 print STDERR "***###!! orig filename did not exist: $filename\n"; 833 } 821 834 822 835 my $original_filename = $self->{'utf8_to_original_filename'}->{$utf8_filename}; 823 836 824 print STDERR "**** Trying for $original_filename\n"; 837 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 838 print STDERR "**** Trying for $original_filename\n"; 839 } 825 840 826 841 if (defined $original_filename && -e $original_filename) { 827 print STDERR "*** found match\n"; 842 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 843 print STDERR "*** found match\n"; 844 } 828 845 $filename = $original_filename; 829 846 } … … 1164 1181 $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'}); 1165 1182 $title =~ s/^\s+//s; # in case title_sub introduced any... 1166 print STDERR "**** adding Title: ", Encode::encode("utf8",$title), "\n";1167 1183 $doc_obj->add_utf8_metadata ($section, "Title", $title); 1168 1184 print $outhandle " extracted Title metadata \"$title\" from $from\n" -
main/trunk/greenstone2/perllib/plugins/ImagePlugin.pm
r23335 r23347 112 112 my $outhandle = $self->{'outhandle'}; 113 113 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 114 114 115 115 if ($self->{'image_conversion_available'} == 1) 116 116 { 117 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 118 119 # my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path); 120 # my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'}); 121 122 # $self->generate_images($filename_full_path, $url_encoded_filename, 123 # $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 124 125 $filename_no_path = &unicode::raw_filename_to_url_encoded($filename_no_path); 126 127 # should we check the return value? 128 $self->generate_images($filename_full_path, $filename_no_path, 129 $doc_obj, $doc_obj->get_top_section(),$filename_encoding); 130 117 my $plugin_filename_encoding = $self->{'filename_encoding'}; 118 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 119 120 my $url_encoded_filename = &unicode::raw_filename_to_url_encoded($filename_no_path); 121 122 # should we check the return value? 123 $self->generate_images($filename_full_path, $url_encoded_filename, 124 $doc_obj, $doc_obj->get_top_section(),$filename_encoding); 125 131 126 } 132 127 else 133 128 { 134 if ($gli) {135 136 }137 # all we do is add the original image as an associated file, and set up srclink etc138 my $assoc_file = $doc_obj->get_assocfile_from_sourcefile();139 my $section = $doc_obj->get_top_section();140 141 $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section);142 143 $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile());144 # We don't know the size of the image, but the browser should display it at full size145 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">");146 147 # Add a fake thumbnail icon with the full-sized image scaled down by the browser148 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">");129 if ($gli) { 130 &gsprintf(STDERR, "<Warning p='ImagePlugin' r='{ImageConverter.noconversionavailable}: {ImageConverter.".$self->{'no_image_conversion_reason'}."}'>"); 131 } 132 # all we do is add the original image as an associated file, and set up srclink etc 133 my $assoc_file = $doc_obj->get_assocfile_from_sourcefile(); 134 my $section = $doc_obj->get_top_section(); 135 136 $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section); 137 138 $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile()); 139 # We don't know the size of the image, but the browser should display it at full size 140 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">"); 141 142 # Add a fake thumbnail icon with the full-sized image scaled down by the browser 143 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">"); 149 144 } 150 145 #we have no text - adds dummy text and NoText metadata 151 146 $self->add_dummy_text($doc_obj, $doc_obj->get_top_section()); 152 147 153 148 return 1; 154 149 155 150 } 156 151
Note:
See TracChangeset
for help on using the changeset viewer.