Changeset 14961
- Timestamp:
- 2008-02-12T15:17:47+13:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/BasPlug.pm
r13968 r14961 797 797 798 798 799 sub filename_to_metadata 800 { 801 my $self = shift (@_); 802 my ($file, $encoding) = @_; 803 804 my $outhandle = $self->{'outhandle'}; 805 806 my $filesystem_encoding = undef; 807 808 eval { 809 use POSIX qw(locale_h); 810 811 # With only one parameter, setlocale retrieves the current value 812 my $current_locale = setlocale(LC_CTYPE); 813 814 if ($current_locale =~ m/^.*\.(.*?)$/) { 815 my $char_encoding = lc($1); 816 $char_encoding =~ s/-/_/g; 817 $char_encoding =~ s/^utf_8$/utf8/; 818 819 if ($char_encoding =~ m/^\d+$/) { 820 if (defined $encodings::encoding->{"windows_$char_encoding"}) { 821 $char_encoding = "windows_$char_encoding"; 822 } 823 elsif (defined $encodings::encoding->{"dos_$char_encoding"}) { 824 $char_encoding = "dos_$char_encoding"; 825 } 826 } 827 828 if (($char_encoding =~ m/(?:ascii|utf8|unicode)/) 829 || (defined $encodings::encoding->{$char_encoding})) { 830 $filesystem_encoding = $char_encoding; 831 } 832 else { 833 print $outhandle "Warning: Unsupported character encoding '$char_encoding' from locale '$current_locale'\n"; 834 } 835 } 836 837 838 }; 839 if ($@) { 840 print $outhandle "$@\n"; 841 print $outhandle "Warning: Unable to establish locale. Will assume filesytem is UTF-8\n"; 842 843 } 844 845 my ($filemeta) = $file =~ /([^\\\/]+)$/; 846 847 # how do we know what encoding the filename is in? 848 # => one answer is to check the locale 849 850 if (defined $filesystem_encoding) { 851 if ($filesystem_encoding !~ /(?:ascii|utf8|unicode)/) { 852 $filemeta = unicode::unicode2utf8( 853 unicode::convert2unicode($filesystem_encoding, \$filemeta) 854 ); 855 } 856 } 857 # assume it is in the same encoding as its contents 858 elsif ((defined $encoding) && ($encoding !~ /(?:ascii|utf8|unicode)/)) { 859 $filemeta = unicode::unicode2utf8( 860 unicode::convert2unicode($encoding, \$filemeta) 861 ); 862 } 863 864 my $dmsafe_filemeta = &ghtml::dmsafe($filemeta); 865 866 return $dmsafe_filemeta; 867 } 868 799 869 800 870 # The BasPlug read_into_doc_obj() function. This function does all the … … 835 905 # create a new document 836 906 my $doc_obj = new doc ($filename, "indexed_doc"); 907 my $top_section = $doc_obj->get_top_section(); 908 837 909 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 838 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); 839 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 840 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 841 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename)); 842 843 my ($filemeta) = $file =~ /([^\\\/]+)$/; 844 # how do we know what encoding the filename is in? 845 # assume it is in the same encoding as its contents 846 if ($encoding !~ /(?:ascii|utf8|unicode)/) { 847 $filemeta = unicode::unicode2utf8( 848 unicode::convert2unicode($encoding, \$filemeta) 849 ); 850 } 851 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 910 $doc_obj->add_utf8_metadata($top_section, "Language", $language); 911 $doc_obj->add_utf8_metadata($top_section, "Encoding", $encoding); 912 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}"); 913 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename)); 914 915 my $filemeta = $self->filename_to_metadata($file,$encoding); 916 $doc_obj->add_utf8_metadata($top_section, "Source", $filemeta); 852 917 if ($self->{'cover_image'}) { 853 918 $self->associate_cover_image($doc_obj, $filename);
Note:
See TracChangeset
for help on using the changeset viewer.