Changeset 23335
- Timestamp:
- 2010-11-19T13:29:29+13:00 (13 years ago)
- Location:
- main/trunk/greenstone2/perllib/plugins
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
r23279 r23335 31 31 32 32 use File::Basename; 33 use Encode; 33 34 34 35 use encodings; … … 785 786 } 786 787 787 # is there ever only one Source? Sometimes this will be called twice, for images etc that are converted. 788 789 790 sub deduce_filename_encoding 791 { 792 my $self = shift (@_); 793 my ($file,$metadata) = @_; 794 795 my $gs_filename_encoding = $metadata->{"gs.filename_encoding"}; 796 my $deduced_filename_encoding = undef; 797 798 # Start by looking for manually assigned metadata 799 if (defined $gs_filename_encoding) { 800 if (ref ($gs_filename_encoding) eq "ARRAY") { 801 my $outhandle = $self->{'outhandle'}; 802 803 $deduced_filename_encoding = $gs_filename_encoding->[0]; 804 805 my $num_vals = scalar(@$gs_filename_encoding); 806 if ($num_vals>1) { 807 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 808 print $outhandle " Selecting first value: $deduced_filename_encoding\n"; 809 } 810 } 811 else { 812 $deduced_filename_encoding = $gs_filename_encoding; 813 } 814 } 815 816 # binmode(STDERR,":utf8"); 817 818 # print STDERR "**** file = $file\n"; 819 # print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";; 820 821 # print STDERR "******* dfe = $deduced_filename_encoding\n"; 822 823 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 824 # Look to see if plugin specifies this value 825 } 826 827 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 828 # See if we can determine the file system encoding through locale 829 # Unix only ? 830 831 # if locale shows us filesystem is utf8, check to see filename is consistent 832 # => if not, then we have an "alien" filename on our hands 833 } 834 835 836 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 837 # Last chance, apply textcat to deduce filename encoding 838 } 839 840 return $deduced_filename_encoding; 841 } 842 843 844 845 846 # Notionally written to be called once for each document, it is however safe to 847 # call multiple times (as in the case of ImagePlugin) which calls this later on 848 # after the original image has potentially been converted to a *new* source image 849 # format (e.g. TIFF to PNG) 850 788 851 sub set_Source_metadata { 789 852 my $self = shift (@_); 790 my ($doc_obj, $filename_no_path, $file_encoding) = @_; 853 my ($doc_obj, $raw_file, $filename_encoding) = @_; 854 855 # 1. Sets the filename (Source) for display encoded as Unicode if possible, 856 # and (as a fallback) using %xx if not for non-ascii chars 857 # 2. Sets the url ref (SourceFile) to the URL encoded version 858 # of filename for generated files 791 859 792 860 my $top_section = $doc_obj->get_top_section(); 793 861 794 862 # UTF-8 version of filename 795 my $filemeta = $self->filename_to_utf8_metadata($filename_no_path, $file_encoding); 863 print STDERR "**** setting Source Metadata given: $raw_file\n"; 864 865 ## my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding); 866 867 my $url_encoded_filename; 868 if (defined $filename_encoding) { 869 # => Generate a pretty print version of filename that is mapped to Unicode 870 871 # Use filename_encoding to map raw filename to a Perl unicode-aware string 872 $url_encoded_filename = decode($filename_encoding,$raw_file); 873 874 print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n"; 875 } 876 else { 877 # otherwise generate %xx encoded version of filename for char > 127 878 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 879 } 880 881 print STDERR "***** saving Source as: $url_encoded_filename\n"; 882 796 883 797 884 # Source is the UTF8 display name - not necessarily the name of the file on the system 798 $doc_obj->set_utf8_metadata_element($top_section, "Source", $ filemeta);799 800 $filemeta = &util::rename_file($filemeta, $self->{'file_rename_method'});885 $doc_obj->set_utf8_metadata_element($top_section, "Source", $url_encoded_filename); 886 887 my $renamed_raw_file = &util::rename_file($raw_file, $self->{'file_rename_method'}); 801 888 # If using URL encoding, then SourceFile is the url-reference to url-encoded 802 # filemeta: it's a url that refers to the actual file on the system 803 $filemeta = &unicode::filename_to_url($filemeta); 804 805 $doc_obj->set_utf8_metadata_element($top_section, "SourceFile", $filemeta); 889 # renamed_raw_url: it's a url that refers to the actual file on the system 890 my $renamed_raw_url = &unicode::filename_to_url($renamed_raw_file); 891 892 $doc_obj->set_utf8_metadata_element($top_section, "SourceFile", 893 $renamed_raw_url); 894 895 print STDERR "***** saving SourceFile as: $renamed_raw_url\n"; 806 896 } 807 897 … … 863 953 864 954 865 # sets the UTF8 filename (Source) for display and sets the url ref to URL encoded version 866 # of the UTF8 filename (SourceFile) for generated files 867 $self->set_Source_metadata($doc_obj, $filename_no_path); 868 955 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 956 $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding); 869 957 870 958 # plugin specific stuff - what args do we need here?? … … 1032 1120 my $file_derived_title = $file; 1033 1121 $file_derived_title =~ s/_/ /g; 1034 $file_derived_title =~ s/\.[^.] $//;1122 $file_derived_title =~ s/\.[^.]+$//; 1035 1123 1036 1124 return $file_derived_title; … … 1044 1132 1045 1133 if (!defined $doc_obj->get_metadata_element ($section, "Title") 1046 or $doc_obj->get_metadata_element($section, "Title") eq "") { 1047 1048 my $file_derived_title = $self->filename_to_utf8_metadata($self->filename_based_title($file)); 1134 || $doc_obj->get_metadata_element($section, "Title") eq "") { 1135 1136 my $source_file = $doc_obj->get_metadata_element($section, "Source"); 1137 my $file_derived_title; 1138 if (defined $source_file) { 1139 $file_derived_title = $self->filename_based_title($source_file); 1140 } 1141 else { 1142 # pp = pretty print 1143 my $pp_file = (defined $source_file) ? $source_file : $file; 1144 1145 my $raw_title = $self->filename_based_title($file); 1146 my $file_derived_title = &unicode::raw_filename_to_url_encoded($raw_title); 1147 } 1148 1149 1049 1150 if (!defined $doc_obj->get_metadata_element ($section, "Title")) { 1050 1151 $doc_obj->add_utf8_metadata ($section, "Title", $file_derived_title); -
main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm
r23277 r23335 312 312 313 313 for (my $i = 0; $i < scalar(@dir); $i++) { 314 my $subfile = $dir[$i]; 314 my $raw_subfile = $dir[$i]; 315 next if ($raw_subfile =~ m/^\.\.?$/); 316 315 317 my $this_file_base_dir = $base_dir; 316 next if ($subfile =~ m/^\.\.?$/);317 318 # Recursively read each $ subfile319 print $outhandle "DirectoryPlugin block recurring: $ subfile\n" if ($verbosity > 2);318 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile); 319 320 # Recursively read each $raw_subfile 321 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2); 320 322 321 323 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir, 324 322 325 &plugin::file_block_read ($pluginfo, $this_file_base_dir, 323 &util::filename_cat($file, $subfile),326 $raw_file_subfile, 324 327 $block_hash, $metadata, $gli); 325 328 … … 390 393 } 391 394 @dir = readdir (DIR); 395 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir; 392 396 closedir (DIR); 393 397 … … 436 440 for (my $i = 0; $i < scalar(@dir); $i++) { 437 441 my $subfile = $dir[$i]; 442 next if ($subfile =~ m/^\.\.?$/); 443 438 444 my $this_file_base_dir = $base_dir; 439 next if ($subfile =~ m/^\.\.?$/); 440 my $file_subfile = &util::filename_cat($file, $subfile); 441 my $full_filename = &util::filename_cat($this_file_base_dir, $file_subfile); 442 if ($self->file_is_blocked($block_hash,$full_filename)) { 443 print STDERR "DirectoryPlugin: file $full_filename was blocked for metadata_read\n" if ($verbosity > 2); 445 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile); 446 447 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile); 448 my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile); 449 450 if ($self->file_is_blocked($block_hash,$raw_full_filename)) { 451 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2); 444 452 next; 445 453 } 446 454 447 # Recursively read each $ subfile448 print $outhandle "DirectoryPlugin metadata recurring: $ subfile\n" if ($verbosity > 2);455 # Recursively read each $raw_subfile 456 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2); 449 457 450 458 &plugin::metadata_read ($pluginfo, $this_file_base_dir, 451 $ file_subfile,$block_hash,459 $raw_file_subfile,$block_hash, 452 460 \@extrametakeys, \%extrametadata, 453 461 \%extrametafile, … … 497 505 last if (!opendir (DIR, $dirname)); 498 506 my @dirnow = readdir (DIR); 507 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow; 499 508 closedir (DIR); 500 509 … … 518 527 519 528 my $subfile = $dir[$i]; 520 my $this_file_base_dir = $base_dir;521 529 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs); 522 530 next if ($subfile =~ /^\.\.?$/); 523 531 524 my $file_subfile = &util::filename_cat($file, $subfile); 525 my $full_filename 526 = &util::filename_cat($this_file_base_dir,$file_subfile); 527 528 if ($self->file_is_blocked($block_hash,$full_filename)) { 529 print STDERR "DirectoryPlugin: file $full_filename was blocked for read\n" if ($verbosity > 2); 532 my $this_file_base_dir = $base_dir; 533 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile); 534 535 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile); 536 my $raw_full_filename 537 = &util::filename_cat($this_file_base_dir,$raw_file_subfile); 538 539 if ($self->file_is_blocked($block_hash,$raw_full_filename)) { 540 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2); 530 541 next; 531 542 } 532 #print STDERR "processing $ full_filename\n";543 #print STDERR "processing $raw_full_filename\n"; 533 544 # Follow Windows shortcuts 534 if ($ subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {545 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) { 535 546 require Win32::Shortcut; 536 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $ subfile));547 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile)); 537 548 if ($shortcut) { 538 549 # The file to be processed is now the target of the shortcut 539 550 $this_file_base_dir = ""; 540 551 $file = ""; 541 $ subfile = $shortcut->Path;552 $raw_subfile = $shortcut->Path; 542 553 } 543 554 } 544 555 545 556 # check for a symlink pointing back to a leading directory 546 if (-d "$dirname/$ subfile" && -l "$dirname/$subfile") {557 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") { 547 558 # readlink gives a "fatal error" on systems that don't implement 548 559 # symlinks. This assumes the the -l test above would fail on those. 549 my $linkdest=readlink "$dirname/$ subfile";560 my $linkdest=readlink "$dirname/$raw_subfile"; 550 561 if (!defined ($linkdest)) { 551 562 # system error - file not found? … … 555 566 if ($linkdest =~ m@^[\./\\]+$@ || 556 567 index($dirname, $linkdest) != -1) { 557 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$ subfile -> $linkdest)\n";568 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n"; 558 569 next; 559 570 ; … … 562 573 } 563 574 564 print $outhandle "DirectoryPlugin: preparing metadata for $ subfile\n" if ($verbosity > 2);565 566 # Make a copy of $in_metadata to pass to $ subfile575 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2); 576 577 # Make a copy of $in_metadata to pass to $raw_subfile 567 578 my $out_metadata = {}; 568 579 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata); 569 580 570 581 # check the assocfile_info 571 if (defined $self->{'assocfile_info'}->{$ full_filename}) {572 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$ full_filename});573 } 574 ## encode the filename as perl5 doesn't handle unicode filenames 575 576 my $tmpfile = Encode::encode_utf8($subfile); 582 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) { 583 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename}); 584 } 585 586 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8 587 577 588 # Next add metadata read in XML files (if it is supplied) 578 589 if ($additionalmetadata == 1) { 579 590 foreach my $filespec (@extrametakeys) { 580 ## use the utf8 encoded filename to do the filename comparison 581 if ($tmpfile =~ /^$filespec$/) { 591 ## use the url-encoded filename to do the filename comparison 592 593 if ($subfile =~ /^$filespec$/) { 582 594 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n" 583 595 if ($verbosity > 2); … … 605 617 # Look to see if it's a completely new file 606 618 607 if (!$block_hash->{'new_files'}->{$ full_filename}) {619 if (!$block_hash->{'new_files'}->{$raw_full_filename}) { 608 620 # Not a new file, must be an existing file 609 621 # Let' see if it's newer than the last import.pl 610 622 611 623 612 if (! -d $ full_filename) {613 if (!$block_hash->{'reindex_files'}->{$ full_filename}) {624 if (! -d $raw_full_filename) { 625 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) { 614 626 # filename has been around for longer than inf_timestamp 615 627 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3); … … 634 646 635 647 $count += &plugin::read ($pluginfo, $this_file_base_dir, 636 $ file_subfile, $block_hash,648 $raw_file_subfile, $block_hash, 637 649 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli); 638 650 } -
main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm
r22951 r23335 181 181 my @file_blocks; 182 182 183 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); 183 my ($language, $content_encoding) = $self->textcat_get_language_encoding ($filename_full_path); 184 $self->{'store_content_encoding'}->{$filename_full_path} = $content_encoding; 184 185 185 186 # read in file ($text will be in utf8) … … 229 230 # Convert the url_original_filename into its utf8 version. Store the utf8 link along with the url_original_filename 230 231 my $utf8_link = ""; 231 $self->decode_text($link,$ encoding,$language,\$utf8_link);232 $self->decode_text($link,$content_encoding,$language,\$utf8_link); 232 233 233 234 $self->{'utf8_to_original_filename'}->{$utf8_link} = $url_original_filename; … … 272 273 273 274 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 275 276 # Lookup content_encoding worked out in file_block pass for this file 277 # Store it under the local name 'content_encoding' so its nice and 278 # easy to access 279 $self->{'content_encoding'} = $self->{'store_content_encoding'}->{$filename_full_path}; 280 274 281 # get the input file 275 282 my $input_filename = $file; … … 311 318 $self->set_Source_metadata($doc_obj, $filename_no_path); 312 319 } 320 321 delete $self->{'store_content_encoding'}->{$filename_full_path}; 322 $self->{'content_encoding'} = undef; 323 313 324 return ($process_status,$doc_obj); 314 325 } … … 367 378 # links, so even if 'file_is_url' is off, still need to store info 368 379 369 my ($tailname,$dirname,$suffix) = &File::Basename::fileparse($file, "\\.[^\\.]+\$"); 370 my $utf8_file = $self->filename_to_utf8_metadata($file); 371 $utf8_file =~ s/&\#095;/_/g; 380 my ($tailname,$dirname) = &File::Basename::fileparse($file); 381 print STDERR "***!! file = $file\n"; 382 # my $utf8_file = $self->filename_to_utf8_metadata($file); 383 # $utf8_file =~ s/&\#095;/_/g; 384 my $utf8_file = &unicode::raw_filename_to_url_encoded($tailname); 385 print STDERR "***!! utf8_file = $utf8_file\n"; 386 372 387 my $web_url = "http://"; 373 388 if(defined $dirname) { # local directory … … 535 550 $self->process_section($textref, $base_dir, $file, $doc_obj, $cursection); 536 551 } 552 537 553 return 1; 538 554 } … … 737 753 return $front . $link . $back if $href =~ m/^(mailto|news|gopher|nntp|telnet|javascript):/is; 738 754 739 740 755 if (($rl == 0) || ($filename =~ m/$self->{'process_exp'}/) || 741 756 ($href =~ m/\/$/) || ($href =~ m/^(mailto|news|gopher|nntp|telnet|javascript):/i)) { 757 758 759 # If web page didn't give encoding, then default to utf8 760 print "*************** looking up $file\n"; 761 762 my $content_encoding= $self->{'content_encoding'} || "utf8"; 763 $href = encode($content_encoding,$href); 764 765 $href = &unicode::raw_filename_to_url_encoded($href); 766 $href = &unicode::filename_to_url($href); 767 742 768 &ghtml::urlsafe ($href); 769 print STDERR "***!!! href=$href\n"; 770 743 771 return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back; 744 772 } else { 745 # link is to some other type of file (e gimage) so we'll773 # link is to some other type of file (e.g., an image) so we'll 746 774 # need to associate that file 747 775 return $front . $self->add_file ($href, $rl, $hash_part, $base_dir, $doc_obj, $section) . $back; … … 769 797 $filename =~ s/([\\\/])tidytmp([\\\/])/$1import$2/; 770 798 } 771 # Replace %XX's in URL with decoded value if required. Note that the filename may include the %XX in some 772 # situations. If the *original* file's name was in URL encoding, the following method will not decode it. 799 800 # Replace %XX's in URL with decoded value if required. Note that the 801 # filename may include the %XX in some situations. If the *original* 802 # file's name was in URL encoding, the following method will not decode 803 # it. 773 804 my $utf8_filename = $filename; 774 $filename = $self->opt_url_decode($utf8_filename); 805 my $opt_decode_utf8_filename = $self->opt_url_decode($utf8_filename); 806 807 my $content_encoding= $self->{'content_encoding'} || "utf8"; 808 809 # The filenames that come through the HTML file have been decoded 810 # into Unicode aware Perl strings. Need to convert them back 811 # to their initial raw-byte encoding to match the file that 812 # exists on the file system 813 $filename = encode($content_encoding, $opt_decode_utf8_filename); 814 775 815 776 816 # some special processing if the intended filename was converted to utf8, but … … 778 818 if (!-e $filename) { 779 819 # try the original filename stored in map 780 my $original_filename = $self->{'utf8_to_original_filename'}->{$filename}; 820 print STDERR "***###!! orig filename did not exist: $filename\n"; 821 822 my $original_filename = $self->{'utf8_to_original_filename'}->{$utf8_filename}; 823 824 print STDERR "**** Trying for $original_filename\n"; 825 781 826 if (defined $original_filename && -e $original_filename) { 827 print STDERR "*** found match\n"; 782 828 $filename = $original_filename; 783 829 } … … 1118 1164 $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'}); 1119 1165 $title =~ s/^\s+//s; # in case title_sub introduced any... 1120 $doc_obj->add_utf8_metadata ($section, 'Title', $title); 1166 print STDERR "**** adding Title: ", Encode::encode("utf8",$title), "\n"; 1167 $doc_obj->add_utf8_metadata ($section, "Title", $title); 1121 1168 print $outhandle " extracted Title metadata \"$title\" from $from\n" 1122 1169 if ($self->{'verbosity'} > 2); -
main/trunk/greenstone2/perllib/plugins/ImageConverter.pm
r22663 r23335 152 152 sub generate_images { 153 153 my $self = shift(@_); 154 my ($filename_full_path, $filename_no_path, $doc_obj, $section ) = @_;154 my ($filename_full_path, $filename_no_path, $doc_obj, $section, $filename_encoding) = @_; 155 155 156 156 # check image magick status … … 196 196 197 197 if ($converttotype ne "" && $filename_full_path !~ m/$converttotype$/) { 198 # $doc_obj->add_utf8_metadata($section, "Image", $utf8_filename_meta);198 # # $doc_obj->add_utf8_metadata($section, "Image", $utf8_filename_meta); 199 199 200 200 my ($result, $converted_filename_full_path) … … 214 214 215 215 # here we overwrite the original with the potentially converted one 216 $doc_obj->set_utf8_metadata_element($section, "Source", &unicode::url_decode($filename_no_path)); # displayname of generated image 217 $doc_obj->set_utf8_metadata_element($section, "SourceFile", $url_to_filename_no_path); # displayname of generated image 216 # $doc_obj->set_utf8_metadata_element($section, "Source", &unicode::url_decode($filename_no_path)); # displayname of generated image 217 # $doc_obj->set_utf8_metadata_element($section, "SourceFile", $url_to_filename_no_path); # displayname of generated image 218 219 # $self->set_Source_metadata($doc_obj,$url_to_filename_no_path,undef); 220 221 $self->set_Source_metadata($doc_obj,&unicode::url_decode($filename_no_path), 222 $filename_encoding); 223 218 224 219 225 # use identify to get info about the (possibly converted) image -
main/trunk/greenstone2/perllib/plugins/ImagePlugin.pm
r22663 r23335 110 110 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 111 111 112 113 112 my $outhandle = $self->{'outhandle'}; 114 113 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); … … 116 115 if ($self->{'image_conversion_available'} == 1) 117 116 { 118 my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path); 119 my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'}); 120 $self->generate_images($filename_full_path, $url_encoded_filename, $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 117 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 118 119 # my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path); 120 # my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'}); 121 122 # $self->generate_images($filename_full_path, $url_encoded_filename, 123 # $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 124 125 $filename_no_path = &unicode::raw_filename_to_url_encoded($filename_no_path); 126 127 # should we check the return value? 128 $self->generate_images($filename_full_path, $filename_no_path, 129 $doc_obj, $doc_obj->get_top_section(),$filename_encoding); 130 121 131 } 122 132 else -
main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm
r22857 r23335 128 128 129 129 # Do encoding stuff 130 my ($language, $ encoding) = $self->textcat_get_language_encoding ($filename_full_path);130 my ($language, $content_encoding) = $self->textcat_get_language_encoding ($filename_full_path); 131 131 if ($self->{'verbosity'} > 2) { 132 print $outhandle "ReadTextFile: reading $file as ($ encoding,$language)\n";132 print $outhandle "ReadTextFile: reading $file as ($content_encoding,$language)\n"; 133 133 } 134 134 … … 140 140 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}"); 141 141 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path)); 142 $self->set_Source_metadata($doc_obj, $filename_no_path, $encoding); 142 143 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 144 $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding); 143 145 144 146 $doc_obj->add_utf8_metadata($top_section, "Language", $language); 145 $doc_obj->add_utf8_metadata($top_section, "Encoding", $ encoding);147 $doc_obj->add_utf8_metadata($top_section, "Encoding", $content_encoding); 146 148 147 149 # read in file ($text will be in utf8) 148 150 my $text = ""; 149 $self->read_file ($filename_full_path, $ encoding, $language, \$text);151 $self->read_file ($filename_full_path, $content_encoding, $language, \$text); 150 152 151 153 if (!length ($text)) {
Note:
See TracChangeset
for help on using the changeset viewer.