Changeset 31478 for main/trunk


Ignore:
Timestamp:
2017-03-09T14:33:20+13:00 (7 years ago)
Author:
kjdon
Message:

blocking stuff moved to here

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/EncodingUtil.pm

    r31456 r31478  
    3434
    3535use PrintInfo;
    36 
     36use Encode;
    3737
    3838BEGIN {
     
    7474
    7575my $arguments =
    76      [ { 'name' => "filename_encoding",
    77     'desc' => "{BasePlugin.filename_encoding}",
    78     'type' => "enum",
    79     'deft' => "auto",
    80     'list' => $encoding_plus_auto_list,
    81     'reqd' => "no" }
    82      ];
     76    [  { 'name' => "block_exp",
     77     'desc' => "{BasePlugin.block_exp}",
     78     'type' => "regexp",
     79     'deft' => "",
     80     'reqd' => "no" },
     81       { 'name' => "no_blocking",
     82     'desc' => "{BasePlugin.no_blocking}",
     83     'type' => "flag",
     84     'reqd' => "no"},
     85       { 'name' => "filename_encoding",
     86     'desc' => "{BasePlugin.filename_encoding}",
     87     'type' => "enum",
     88     'deft' => "auto",
     89     'list' => $encoding_plus_auto_list,
     90     'reqd' => "no" }
     91    ];
    8392
    8493my $options = { 'name'     => "EncodingUtil",
     
    120129}
    121130
    122 
     131# converts raw filesystem filename to perl unicode format
     132sub raw_filename_to_unicode {
     133    my $self = shift (@_);
     134    my ($file) = @_;
     135
     136    my $unicode_file = "";
     137    ### need it in perl unicode, not raw filesystem
     138    my $filename_encoding =  $self->guess_filesystem_encoding(); 
     139       
     140    # copied this from set_Source_metadata in BasePlugin
     141    if ((defined $filename_encoding) && ($filename_encoding ne "ascii")) {
     142    # Use filename_encoding to map raw filename to a Perl unicode-aware string
     143    $unicode_file = decode($filename_encoding,$file);       
     144    }
     145    else {
     146    # otherwise generate %xx encoded version of filename for char > 127
     147    $unicode_file = &unicode::raw_filename_to_url_encoded($file);
     148    }
     149    return $unicode_file;
     150
     151}
    123152# just converts path as is to utf8.
    124153sub filepath_to_utf8 {
     
    544573}
    545574
     575sub block_raw_filename {
     576
     577    my $self = shift (@_);
     578    my ($block_hash,$filename_full_path) = @_;
     579
     580    my $unicode_filename = $self->raw_filename_to_unicode($filename_full_path);
     581    return $self->block_filename($block_hash, $unicode_filename);
     582}
     583
     584# block unicode string filename
     585sub block_filename
     586{
     587    my $self = shift (@_);
     588    my ($block_hash,$filename_full_path) = @_;
     589    print STDERR "in block filename $filename_full_path\n";
     590    print STDERR &unicode::debug_unicode_string($filename_full_path)."\n";
     591     
     592    if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
     593       # block hash contains long names, lets make sure that we were passed a long name
     594       $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
     595       # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
     596       my $lower_filename_full_path = lc($filename_full_path);
     597       $block_hash->{'file_blocks'}->{$lower_filename_full_path} = 1;
     598   
     599    }
     600    elsif ($ENV{'GSDLOS'} =~ m/^darwin$/) {
     601    # we need to normalize the filenames
     602        my $composed_filename_full_path = normalize('C', $filename_full_path);
     603        print STDERR "darwin, composed filename =". &unicode::debug_unicode_string($composed_filename_full_path)."\n";
     604        $block_hash->{'file_blocks'}->{$composed_filename_full_path} = 1;
     605   }
     606 
     607    else {
     608    $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
     609    }
     610}
     611
     612
     613# filename is raw filesystem name
     614sub raw_file_is_blocked {
     615     my $self = shift (@_);
     616     my ($block_hash, $filename_full_path) = @_;
     617
     618     my $unicode_filename_full_path = $self->raw_filename_to_unicode($filename_full_path);
     619     return $self->file_is_blocked($block_hash, $unicode_filename_full_path);
     620}
     621
     622# filename must be perl unicode string
     623sub file_is_blocked {
     624    my $self = shift (@_);
     625    my ($block_hash, $filename_full_path) = @_;
     626
     627    #
     628    print STDERR "in file is blocked $filename_full_path\n";
     629    print STDERR &unicode::debug_unicode_string($filename_full_path)."\n";
     630    if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
     631    # convert to long filenames if needed
     632    $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
     633    # all block paths are lowercased.
     634    my $lower_filename = lc ($filename_full_path);
     635    if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
     636        $self->{'num_blocked'} ++;
     637        return 1;
     638    }
     639    }
     640    else {
     641    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
     642        $self->{'num_blocked'} ++;
     643        print STDERR "BLOCKED\n";
     644        return 1;
     645    }
     646    }
     647    # check Directory plugin's own block_exp
     648    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
     649    $self->{'num_blocked'} ++;
     650    return 1; # blocked
     651    }
     652    print STDERR "NOT BLOCKED\n";
     653    return 0;
     654}
     655
     656
    5466571;
    547658
Note: See TracChangeset for help on using the changeset viewer.