Changeset 23335 for main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
- Timestamp:
- 2010-11-19T13:29:29+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
r23279 r23335 31 31 32 32 use File::Basename; 33 use Encode; 33 34 34 35 use encodings; … … 785 786 } 786 787 787 # is there ever only one Source? Sometimes this will be called twice, for images etc that are converted. 788 789 790 sub deduce_filename_encoding 791 { 792 my $self = shift (@_); 793 my ($file,$metadata) = @_; 794 795 my $gs_filename_encoding = $metadata->{"gs.filename_encoding"}; 796 my $deduced_filename_encoding = undef; 797 798 # Start by looking for manually assigned metadata 799 if (defined $gs_filename_encoding) { 800 if (ref ($gs_filename_encoding) eq "ARRAY") { 801 my $outhandle = $self->{'outhandle'}; 802 803 $deduced_filename_encoding = $gs_filename_encoding->[0]; 804 805 my $num_vals = scalar(@$gs_filename_encoding); 806 if ($num_vals>1) { 807 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 808 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 809 } 810 } 811 else { 812 $deduced_filename_encoding = $gs_filename_encoding; 813 } 814 } 815 816 # binmode(STDERR,":utf8"); 817 818 # print STDERR "**** file = $file\n"; 819 # print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";; 820 821 # print STDERR "******* dfe = $deduced_filename_encoding\n"; 822 823 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 824 # Look to see if plugin specifies this value 825 } 826 827 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 828 # See if we can determine the file system encoding through locale 829 # Unix only ? 830 831 # if locale shows us filesystem is utf8, check to see filename is consistent 832 # => if not, then we have an "alien" filename on our hands 833 } 834 835 836 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 837 # Last chance, apply textcat to deduce filename encoding 838 } 839 840 return $deduced_filename_encoding; 841 } 842 843 844 845 846 # Notionally written to be called once for each document, it is however safe to 847 # call multiple times (as in the case of ImagePlugin) which calls this later on 848 # after the original image has potentially been converted to a *new* source image 849 # format (e.g. TIFF to PNG) 850 788 851 sub set_Source_metadata { 789 852 my $self = shift (@_); 790 my ($doc_obj, $filename_no_path, $file_encoding) = @_; 853 my ($doc_obj, $raw_file, $filename_encoding) = @_; 854 855 # 1. Sets the filename (Source) for display encoded as Unicode if possible, 856 # and (as a fallback) using %xx if not for non-ascii chars 857 # 2. Sets the url ref (SourceFile) to the URL encoded version 858 # of filename for generated files 791 859 792 860 my $top_section = $doc_obj->get_top_section(); 793 861 794 862 # UTF-8 version of filename 795 my $filemeta = $self->filename_to_utf8_metadata($filename_no_path, $file_encoding); 863 print STDERR "**** setting Source Metadata given: $raw_file\n"; 864 865 ## my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding); 866 867 my $url_encoded_filename; 868 if (defined $filename_encoding) { 869 # => Generate a pretty print version of filename that is mapped to Unicode 870 871 # Use filename_encoding to map raw filename to a Perl unicode-aware string 872 $url_encoded_filename = decode($filename_encoding,$raw_file); 873 874 print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n"; 875 } 876 else { 877 # otherwise generate %xx encoded version of filename for char > 127 878 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 879 } 880 881 print STDERR "***** saving Source as: $url_encoded_filename\n"; 882 796 883 797 884 # Source is the UTF8 display name - not necessarily the name of the file on the system 798 $doc_obj->set_utf8_metadata_element($top_section, "Source", $ filemeta);799 800 $filemeta = &util::rename_file($filemeta, $self->{'file_rename_method'});885 $doc_obj->set_utf8_metadata_element($top_section, "Source", $url_encoded_filename); 886 887 my $renamed_raw_file = &util::rename_file($raw_file, $self->{'file_rename_method'}); 801 888 # If using URL encoding, then SourceFile is the url-reference to url-encoded 802 # filemeta: it's a url that refers to the actual file on the system 803 $filemeta = &unicode::filename_to_url($filemeta); 804 805 $doc_obj->set_utf8_metadata_element($top_section, "SourceFile", $filemeta); 889 # renamed_raw_url: it's a url that refers to the actual file on the system 890 my $renamed_raw_url = &unicode::filename_to_url($renamed_raw_file); 891 892 $doc_obj->set_utf8_metadata_element($top_section, "SourceFile", 893 $renamed_raw_url); 894 895 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 806 896 } 807 897 … … 863 953 864 954 865 # sets the UTF8 filename (Source) for display and sets the url ref to URL encoded version 866 # of the UTF8 filename (SourceFile) for generated files 867 $self->set_Source_metadata($doc_obj, $filename_no_path); 868 955 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 956 $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding); 869 957 870 958 # plugin specific stuff - what args do we need here?? … … 1032 1120 my $file_derived_title = $file; 1033 1121 $file_derived_title =~ s/_/ /g; 1034 $file_derived_title =~ s/\.[^.] $//;1122 $file_derived_title =~ s/\.[^.]+$//; 1035 1123 1036 1124 return $file_derived_title; … … 1044 1132 1045 1133 if (!defined $doc_obj->get_metadata_element ($section, "Title") 1046 or $doc_obj->get_metadata_element($section, "Title") eq "") { 1047 1048 my $file_derived_title = $self->filename_to_utf8_metadata($self->filename_based_title($file)); 1134 || $doc_obj->get_metadata_element($section, "Title") eq "") { 1135 1136 my $source_file = $doc_obj->get_metadata_element($section, "Source"); 1137 my $file_derived_title; 1138 if (defined $source_file) { 1139 $file_derived_title = $self->filename_based_title($source_file); 1140 } 1141 else { 1142 # pp = pretty print 1143 my $pp_file = (defined $source_file) ? $source_file : $file; 1144 1145 my $raw_title = $self->filename_based_title($file); 1146 my $file_derived_title = &unicode::raw_filename_to_url_encoded($raw_title); 1147 } 1148 1149 1049 1150 if (!defined $doc_obj->get_metadata_element ($section, "Title")) { 1050 1151 $doc_obj->add_utf8_metadata ($section, "Title", $file_derived_title);
Note:
See TracChangeset
for help on using the changeset viewer.