Changeset 31478

Show
Ignore:
Timestamp:
09.03.2017 14:33:20 (3 years ago)
Author:
kjdon
Message:

blocking stuff moved to here

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/EncodingUtil.pm

    r31456 r31478  
    3434 
    3535use PrintInfo; 
    36  
     36use Encode; 
    3737 
    3838BEGIN { 
     
    7474 
    7575my $arguments = 
    76      [ { 'name' => "filename_encoding", 
    77     'desc' => "{BasePlugin.filename_encoding}", 
    78     'type' => "enum", 
    79     'deft' => "auto", 
    80     'list' => $encoding_plus_auto_list, 
    81     'reqd' => "no" } 
    82      ]; 
     76    [  { 'name' => "block_exp", 
     77     'desc' => "{BasePlugin.block_exp}", 
     78     'type' => "regexp", 
     79     'deft' => "", 
     80     'reqd' => "no" }, 
     81       { 'name' => "no_blocking", 
     82     'desc' => "{BasePlugin.no_blocking}", 
     83     'type' => "flag", 
     84     'reqd' => "no"}, 
     85       { 'name' => "filename_encoding", 
     86     'desc' => "{BasePlugin.filename_encoding}", 
     87     'type' => "enum", 
     88     'deft' => "auto", 
     89     'list' => $encoding_plus_auto_list, 
     90     'reqd' => "no" } 
     91    ]; 
    8392 
    8493my $options = { 'name'     => "EncodingUtil", 
     
    120129} 
    121130 
    122  
     131# converts raw filesystem filename to perl unicode format 
     132sub raw_filename_to_unicode { 
     133    my $self = shift (@_); 
     134    my ($file) = @_; 
     135 
     136    my $unicode_file = ""; 
     137    ### need it in perl unicode, not raw filesystem 
     138    my $filename_encoding =  $self->guess_filesystem_encoding();   
     139         
     140    # copied this from set_Source_metadata in BasePlugin 
     141    if ((defined $filename_encoding) && ($filename_encoding ne "ascii")) { 
     142    # Use filename_encoding to map raw filename to a Perl unicode-aware string  
     143    $unicode_file = decode($filename_encoding,$file);        
     144    } 
     145    else { 
     146    # otherwise generate %xx encoded version of filename for char > 127 
     147    $unicode_file = &unicode::raw_filename_to_url_encoded($file); 
     148    } 
     149    return $unicode_file; 
     150 
     151} 
    123152# just converts path as is to utf8. 
    124153sub filepath_to_utf8 { 
     
    544573} 
    545574 
     575sub block_raw_filename { 
     576 
     577    my $self = shift (@_); 
     578    my ($block_hash,$filename_full_path) = @_; 
     579 
     580    my $unicode_filename = $self->raw_filename_to_unicode($filename_full_path); 
     581    return $self->block_filename($block_hash, $unicode_filename); 
     582} 
     583 
     584# block unicode string filename 
     585sub block_filename 
     586{ 
     587    my $self = shift (@_); 
     588    my ($block_hash,$filename_full_path) = @_; 
     589    print STDERR "in block filename $filename_full_path\n"; 
     590    print STDERR &unicode::debug_unicode_string($filename_full_path)."\n"; 
     591      
     592    if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) { 
     593       # block hash contains long names, lets make sure that we were passed a long name 
     594       $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path); 
     595       # lower case the entire thing, eg for cover.jpg when its actually cover.JPG 
     596       my $lower_filename_full_path = lc($filename_full_path); 
     597       $block_hash->{'file_blocks'}->{$lower_filename_full_path} = 1; 
     598     
     599    } 
     600    elsif ($ENV{'GSDLOS'} =~ m/^darwin$/) { 
     601    # we need to normalize the filenames 
     602        my $composed_filename_full_path = normalize('C', $filename_full_path); 
     603        print STDERR "darwin, composed filename =". &unicode::debug_unicode_string($composed_filename_full_path)."\n"; 
     604        $block_hash->{'file_blocks'}->{$composed_filename_full_path} = 1; 
     605   } 
     606  
     607    else { 
     608    $block_hash->{'file_blocks'}->{$filename_full_path} = 1; 
     609    } 
     610} 
     611 
     612 
     613# filename is raw filesystem name 
     614sub raw_file_is_blocked { 
     615     my $self = shift (@_); 
     616     my ($block_hash, $filename_full_path) = @_; 
     617 
     618     my $unicode_filename_full_path = $self->raw_filename_to_unicode($filename_full_path); 
     619     return $self->file_is_blocked($block_hash, $unicode_filename_full_path); 
     620} 
     621 
     622# filename must be perl unicode string 
     623sub file_is_blocked { 
     624    my $self = shift (@_); 
     625    my ($block_hash, $filename_full_path) = @_; 
     626 
     627    #  
     628    print STDERR "in file is blocked $filename_full_path\n"; 
     629    print STDERR &unicode::debug_unicode_string($filename_full_path)."\n"; 
     630    if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) { 
     631    # convert to long filenames if needed 
     632    $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path); 
     633    # all block paths are lowercased. 
     634    my $lower_filename = lc ($filename_full_path); 
     635    if (defined $block_hash->{'file_blocks'}->{$lower_filename}) { 
     636        $self->{'num_blocked'} ++; 
     637        return 1; 
     638    } 
     639    } 
     640    else { 
     641    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) { 
     642        $self->{'num_blocked'} ++; 
     643        print STDERR "BLOCKED\n"; 
     644        return 1; 
     645    } 
     646    } 
     647    # check Directory plugin's own block_exp  
     648    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 
     649    $self->{'num_blocked'} ++; 
     650    return 1; # blocked 
     651    } 
     652    print STDERR "NOT BLOCKED\n"; 
     653    return 0; 
     654} 
     655 
     656 
    5466571; 
    547658