Changeset 31478 for main/trunk/greenstone2
- Timestamp:
- 2017-03-09T14:33:20+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/EncodingUtil.pm
r31456 r31478 34 34 35 35 use PrintInfo; 36 36 use Encode; 37 37 38 38 BEGIN { … … 74 74 75 75 my $arguments = 76 [ { 'name' => "filename_encoding", 77 'desc' => "{BasePlugin.filename_encoding}", 78 'type' => "enum", 79 'deft' => "auto", 80 'list' => $encoding_plus_auto_list, 81 'reqd' => "no" } 82 ]; 76 [ { 'name' => "block_exp", 77 'desc' => "{BasePlugin.block_exp}", 78 'type' => "regexp", 79 'deft' => "", 80 'reqd' => "no" }, 81 { 'name' => "no_blocking", 82 'desc' => "{BasePlugin.no_blocking}", 83 'type' => "flag", 84 'reqd' => "no"}, 85 { 'name' => "filename_encoding", 86 'desc' => "{BasePlugin.filename_encoding}", 87 'type' => "enum", 88 'deft' => "auto", 89 'list' => $encoding_plus_auto_list, 90 'reqd' => "no" } 91 ]; 83 92 84 93 my $options = { 'name' => "EncodingUtil", … … 120 129 } 121 130 122 131 # converts raw filesystem filename to perl unicode format 132 sub raw_filename_to_unicode { 133 my $self = shift (@_); 134 my ($file) = @_; 135 136 my $unicode_file = ""; 137 ### need it in perl unicode, not raw filesystem 138 my $filename_encoding = $self->guess_filesystem_encoding(); 139 140 # copied this from set_Source_metadata in BasePlugin 141 if ((defined $filename_encoding) && ($filename_encoding ne "ascii")) { 142 # Use filename_encoding to map raw filename to a Perl unicode-aware string 143 $unicode_file = decode($filename_encoding,$file); 144 } 145 else { 146 # otherwise generate %xx encoded version of filename for char > 127 147 $unicode_file = &unicode::raw_filename_to_url_encoded($file); 148 } 149 return $unicode_file; 150 151 } 123 152 # just converts path as is to utf8. 124 153 sub filepath_to_utf8 { … … 544 573 } 545 574 575 sub block_raw_filename { 576 577 my $self = shift (@_); 578 my ($block_hash,$filename_full_path) = @_; 579 580 my $unicode_filename = $self->raw_filename_to_unicode($filename_full_path); 581 return $self->block_filename($block_hash, $unicode_filename); 582 } 583 584 # block unicode string filename 585 sub block_filename 586 { 587 my $self = shift (@_); 588 my ($block_hash,$filename_full_path) = @_; 589 print STDERR "in block filename $filename_full_path\n"; 590 print STDERR &unicode::debug_unicode_string($filename_full_path)."\n"; 591 592 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) { 593 # block hash contains long names, lets make sure that we were passed a long name 594 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path); 595 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG 596 my $lower_filename_full_path = lc($filename_full_path); 597 $block_hash->{'file_blocks'}->{$lower_filename_full_path} = 1; 598 599 } 600 elsif ($ENV{'GSDLOS'} =~ m/^darwin$/) { 601 # we need to normalize the filenames 602 my $composed_filename_full_path = normalize('C', $filename_full_path); 603 print STDERR "darwin, composed filename =". &unicode::debug_unicode_string($composed_filename_full_path)."\n"; 604 $block_hash->{'file_blocks'}->{$composed_filename_full_path} = 1; 605 } 606 607 else { 608 $block_hash->{'file_blocks'}->{$filename_full_path} = 1; 609 } 610 } 611 612 613 # filename is raw filesystem name 614 sub raw_file_is_blocked { 615 my $self = shift (@_); 616 my ($block_hash, $filename_full_path) = @_; 617 618 my $unicode_filename_full_path = $self->raw_filename_to_unicode($filename_full_path); 619 return $self->file_is_blocked($block_hash, $unicode_filename_full_path); 620 } 621 622 # filename must be perl unicode string 623 sub file_is_blocked { 624 my $self = shift (@_); 625 my ($block_hash, $filename_full_path) = @_; 626 627 # 628 print STDERR "in file is blocked $filename_full_path\n"; 629 print STDERR &unicode::debug_unicode_string($filename_full_path)."\n"; 630 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) { 631 # convert to long filenames if needed 632 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path); 633 # all block paths are lowercased. 634 my $lower_filename = lc ($filename_full_path); 635 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) { 636 $self->{'num_blocked'} ++; 637 return 1; 638 } 639 } 640 else { 641 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) { 642 $self->{'num_blocked'} ++; 643 print STDERR "BLOCKED\n"; 644 return 1; 645 } 646 } 647 # check Directory plugin's own block_exp 648 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 649 $self->{'num_blocked'} ++; 650 return 1; # blocked 651 } 652 print STDERR "NOT BLOCKED\n"; 653 return 0; 654 } 655 656 546 657 1; 547 658
Note:
See TracChangeset
for help on using the changeset viewer.