Changeset 23347 for main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
- Timestamp:
- 2010-11-26T09:43:59+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
r23335 r23347 530 530 # check if the filename is already in UTF8. If it is, then we're done. 531 531 if($filename_encoding =~ m/auto/) { 532 if(&unicode::check_is_utf8($filemeta))533 {534 535 536 }532 if(&unicode::check_is_utf8($filemeta)) 533 { 534 $filename_encoding = "utf8"; 535 return $filemeta; 536 } 537 537 } 538 538 … … 540 540 if ($filename_encoding eq "auto") 541 541 { 542 # try textcat543 $filename_encoding = $self->textcat_encoding($filemeta);542 # try textcat 543 $filename_encoding = $self->textcat_encoding($filemeta); 544 544 545 # check the locale next546 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";545 # check the locale next 546 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 547 547 548 549 # now try the encoding of the document, if available550 if ($filename_encoding eq "undefined" && defined $file_encoding) {551 552 }548 549 # now try the encoding of the document, if available 550 if ($filename_encoding eq "undefined" && defined $file_encoding) { 551 $filename_encoding = $file_encoding; 552 } 553 553 554 554 } … … 633 633 634 634 my $outhandle = $self->{'outhandle'}; 635 636 print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n"; 637 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0); 638 print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 639 635 640 636 641 my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end) … … 791 796 { 792 797 my $self = shift (@_); 793 my ($file,$metadata ) = @_;798 my ($file,$metadata,$plugin_filename_encoding) = @_; 794 799 795 800 my $gs_filename_encoding = $metadata->{"gs.filename_encoding"}; … … 798 803 # Start by looking for manually assigned metadata 799 804 if (defined $gs_filename_encoding) { 800 if (ref ($gs_filename_encoding) eq "ARRAY") { 801 my $outhandle = $self->{'outhandle'}; 802 803 $deduced_filename_encoding = $gs_filename_encoding->[0]; 804 805 my $num_vals = scalar(@$gs_filename_encoding); 806 if ($num_vals>1) { 807 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 808 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 809 } 810 } 811 else { 812 $deduced_filename_encoding = $gs_filename_encoding; 813 } 814 } 815 816 # binmode(STDERR,":utf8"); 817 818 # print STDERR "**** file = $file\n"; 819 # print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";; 820 821 # print STDERR "******* dfe = $deduced_filename_encoding\n"; 822 805 if (ref ($gs_filename_encoding) eq "ARRAY") { 806 my $outhandle = $self->{'outhandle'}; 807 808 $deduced_filename_encoding = $gs_filename_encoding->[0]; 809 810 my $num_vals = scalar(@$gs_filename_encoding); 811 if ($num_vals>1) { 812 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 813 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 814 } 815 } 816 else { 817 $deduced_filename_encoding = $gs_filename_encoding; 818 } 819 } 820 823 821 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 824 # Look to see if plugin specifies this value 822 # Look to see if plugin specifies this value 823 824 if (defined $plugin_filename_encoding) { 825 # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options) 826 if ($plugin_filename_encoding =~ m/^auto-.*$/) { 827 my $outhandle = $self->{'outhandle'}; 828 print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n"; 829 print $outhandle " default to 'auto'\n"; 830 $self->{'filename_encoding'} = $plugin_filename_encoding = "auto"; 831 } 832 833 if ($plugin_filename_encoding ne "auto") { 834 # We've been given a specific filenamne encoding 835 # => so use it! 836 $deduced_filename_encoding = $plugin_filename_encoding; 837 } 838 } 825 839 } 826 840 827 841 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 828 # See if we can determine the file system encoding through locale 829 # Unix only ? 842 # See if we can determine the file system encoding through locale 843 $deduced_filename_encoding = $self->locale_encoding(); 844 845 # if locale shows us filesystem is utf8, check to see filename is consistent 846 # => if not, then we have an "alien" filename on our hands 847 848 if ($deduced_filename_encoding =~ m/^utf-?8$/i) { 849 if (!&unicode::check_is_utf8($file)) { 850 # "alien" filename, so revert 851 $deduced_filename_encoding = undef; 852 } 853 } 854 } 855 856 857 # if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 858 # # Last chance, apply textcat to deduce filename encoding 859 # $deduced_filename_encoding = $self->textcat_encoding($file); 860 # } 861 862 if ($self->{'verbosity'}>3) { 863 my $outhandle = $self->{'outhandle'}; 864 865 if (defined $deduced_filename_encoding) { 866 print $outhandle " Deduced filename encoding as: $deduced_filename_encoding\n"; 867 } 868 else { 869 print $outhandle " No filename encoding deduced\n"; 870 } 871 } 830 872 831 # if locale shows us filesystem is utf8, check to see filename is consistent832 # => if not, then we have an "alien" filename on our hands833 }834 835 836 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {837 # Last chance, apply textcat to deduce filename encoding838 }839 840 873 return $deduced_filename_encoding; 841 874 } … … 861 894 862 895 # UTF-8 version of filename 863 print STDERR "**** setting Source Metadata given: $raw_file\n"; 864 865 ## my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding); 896 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 897 print STDERR "****** Setting Source Metadata given: $raw_file\n"; 898 } 866 899 867 900 my $url_encoded_filename; 868 901 if (defined $filename_encoding) { 869 # => Generate a pretty print version of filename that is mapped to Unicode 870 871 # Use filename_encoding to map raw filename to a Perl unicode-aware string 872 $url_encoded_filename = decode($filename_encoding,$raw_file); 873 874 print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n"; 902 # => Generate a pretty print version of filename that is mapped to Unicode 903 904 # Use filename_encoding to map raw filename to a Perl unicode-aware string 905 $url_encoded_filename = decode($filename_encoding,$raw_file); 875 906 } 876 907 else { 877 # otherwise generate %xx encoded version of filename for char > 127 878 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 879 } 880 881 print STDERR "***** saving Source as: $url_encoded_filename\n"; 908 # otherwise generate %xx encoded version of filename for char > 127 909 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 910 } 911 912 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 913 print STDERR "***** saving Source as: $url_encoded_filename\n"; 914 } 882 915 883 916 … … 893 926 $renamed_raw_url); 894 927 895 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 928 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 929 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 930 } 896 931 } 897 932 … … 953 988 954 989 955 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 990 my $plugin_filename_encoding = $self->{'filename_encoding'}; 991 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 956 992 $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding); 957 993
Note:
See TracChangeset
for help on using the changeset viewer.