Changeset 16391

Show
Ignore:
Timestamp:
14.07.2008 14:56:43 (11 years ago)
Author:
kjdon
Message:

global block pass: this plugin now does the blocking - when reading through a directory, it checks each filename agains the block_hash to see if its been recorded or not. If it has, then its blocked and not passed on to the plugin pipeline.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r15870 r16391  
    8888    die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n"; 
    8989    } 
    90          
     90     
    9191    $self->{'subdir_extrametakeys'} = {}; 
    9292 
     
    135135} 
    136136 
    137 # return number of files processed, undef if can't process 
    138 # Note that $base_dir might be "" and that $file might  
    139 # include directories 
    140  
    141 # This function passes around metadata hash structures.  Metadata hash 
    142 # structures are hashes that map from a (scalar) key (the metadata element 
    143 # name) to either a scalar metadata value or a reference to an array of 
    144 # such values. 
    145  
    146 sub read { 
    147     my $self = shift (@_); 
    148     my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    149      
     137sub check_directory_path { 
     138 
     139    my $self = shift(@_); 
     140    my ($dirname) = @_; 
     141     
     142    return undef unless (-d $dirname); 
     143 
     144    return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/); 
     145 
    150146    my $outhandle = $self->{'outhandle'}; 
    151     my $verbosity = $self->{'verbosity'}; 
    152   
    153     # Calculate the directory name and ensure it is a directory and 
    154     # that it is not explicitly blocked. 
    155     my $dirname = $file; 
    156     $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    157     return undef unless (-d $dirname); 
    158     return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/); 
    159  
     147     
    160148    # check to make sure we're not reading the archives or index directory 
    161149    my $gsdlhome = quotemeta($ENV{'GSDLHOME'}); 
     
    176164    return 0; 
    177165    } 
    178      
    179     if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) { 
    180         print $outhandle "DirectoryPlugin: metadata passed in: ",  
    181     join(", ", keys %$in_metadata), "\n"; 
    182     } 
    183      
    184     # Recur over directory contents. 
     166 
     167    return 1; 
     168} 
     169 
     170# this may be called more than once 
     171sub sort_out_associated_files { 
     172 
     173    my $self = shift (@_); 
     174    my ($block_hash) = @_; 
     175    if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) { 
     176    return; 
     177    } 
     178 
     179    $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'}; 
     180    my $metadata = $self->{'assocfile_info'}; 
     181    foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) { 
     182    my $record = $block_hash->{'shared_fileroot'}->{$prefix}; 
     183 
     184    my $tie_to = $record->{'tie_to'}; 
     185    my $exts = $record->{'exts'}; 
     186     
     187    if ((defined $tie_to) && (scalar (keys %$exts) > 0)) { 
     188        # set up fileblocks and assocfile_tobe 
     189        my $base_file = "$prefix$tie_to"; 
     190        $metadata->{$base_file} = {} unless defined $metadata->{$base_file}; 
     191        my $base_file_metadata = $metadata->{$base_file}; 
     192         
     193        $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'}; 
     194        my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'}; 
     195        foreach my $e (keys %$exts) { 
     196        # block the file 
     197        $block_hash->{'file_blocks'}->{"$prefix$e"} = 1; 
     198        # set up as an associatd file 
     199        print STDERR "  $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n"; 
     200        my $mime_type = ""; # let system auto detect this 
     201        push(@$assoc_tobe,"$prefix$e:$mime_type:");  
     202 
     203        } 
     204    } 
     205    } # foreach record 
     206 
     207    $block_hash->{'shared_fileroot'} = undef; 
     208    $block_hash->{'shared_fileroot'} = {}; 
     209 
     210} 
     211 
     212 
     213# do block exp OR special blocking ??? 
     214 
     215sub file_is_blocked { 
     216    my $self = shift (@_); 
     217    my ($block_hash, $filename_full_path) = @_; 
     218 
     219    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) { 
     220    $self->{'num_blocked'} ++; 
     221    return 1; 
     222    } 
     223    # check Directory plugin's own block_exp  
     224    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 
     225    $self->{'num_blocked'} ++; 
     226    return 1; # blocked 
     227    } 
     228    return 0; 
     229} 
     230 
     231 
     232 
     233sub file_block_read { 
     234    my $self = shift (@_); 
     235    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_; 
     236 
     237    my $outhandle = $self->{'outhandle'}; 
     238    my $verbosity = $self->{'verbosity'}; 
     239     
     240    # Calculate the directory name and ensure it is a directory and 
     241    # that it is not explicitly blocked. 
     242    my $dirname = $file; 
     243    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
     244 
     245    my $directory_ok = $self->check_directory_path($dirname); 
     246    return $directory_ok unless (defined $directory_ok && $directory_ok == 1); 
     247 
     248    $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'}; 
     249    $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'}; 
     250 
     251     # Recur over directory contents. 
    185252    my (@dir, $subfile); 
    186     my $count = 0; 
    187      
    188     print $outhandle "DirectoryPlugin: getting directory $dirname\n" if ($verbosity); 
     253    #my $count = 0; 
     254     
     255    print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2); 
    189256     
    190257    # find all the files in the directory 
     
    198265    @dir = readdir (DIR); 
    199266    closedir (DIR); 
     267     
     268    for (my $i = 0; $i < scalar(@dir); $i++) { 
     269    my $subfile = $dir[$i]; 
     270    my $this_file_base_dir = $base_dir; 
     271    next if ($subfile =~ m/^\.\.?$/); 
     272 
     273    # Recursively read each $subfile 
     274    print $outhandle "DirectoryPlugin block recurring: $subfile\n" if ($verbosity > 2); 
     275     
     276    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir, 
     277    &plugin::file_block_read ($pluginfo, $this_file_base_dir, 
     278                  &util::filename_cat($file, $subfile), 
     279                  $block_hash, $metadata, $gli); 
     280     
     281    } 
     282    $self->sort_out_associated_files($block_hash); 
     283    #return $count; 
     284     
     285} 
     286# return number of files processed, undef if can't process 
     287# Note that $base_dir might be "" and that $file might  
     288# include directories 
     289 
     290# This function passes around metadata hash structures.  Metadata hash 
     291# structures are hashes that map from a (scalar) key (the metadata element 
     292# name) to either a scalar metadata value or a reference to an array of 
     293# such values. 
     294 
     295sub read { 
     296    my $self = shift (@_); 
     297    my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     298     
     299    my $outhandle = $self->{'outhandle'}; 
     300    my $verbosity = $self->{'verbosity'}; 
     301     
     302    # Calculate the directory name and ensure it is a directory and 
     303    # that it is not explicitly blocked. 
     304    my $dirname; 
     305    if ($file eq "") { 
     306    $dirname = $base_dir; 
     307    } else { 
     308    $dirname = $file; 
     309    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
     310    } 
     311 
     312    my $directory_ok = $self->check_directory_path($dirname); 
     313    return $directory_ok unless (defined $directory_ok && $directory_ok == 1); 
     314         
     315    if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) { 
     316        print $outhandle "DirectoryPlugin: metadata passed in: ",  
     317    join(", ", keys %$in_metadata), "\n"; 
     318    } 
     319     
     320 
     321    # Recur over directory contents. 
     322    my (@dir, $subfile); 
     323    my $count = 0; 
     324     
     325    print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2); 
     326     
     327    # find all the files in the directory 
     328    if (!opendir (DIR, $dirname)) { 
     329    if ($gli) { 
     330        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n"; 
     331    } 
     332    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n"; 
     333    return -1; # error in processing 
     334    } 
     335    @dir = readdir (DIR); 
     336    closedir (DIR); 
    200337 
    201338    # Re-order the files in the list so any directories ending with .all are moved to the end 
     
    211348    my %extrametadata;               # maps from filespec to extra metadata keys 
    212349    my @extrametakeys;               # keys of %extrametadata in order read 
     350 
    213351 
    214352    my $os_dirsep = &util::get_os_dirsep(); 
     
    239377    last if ($maxdocs != -1 && $count >= $maxdocs); 
    240378    next if ($subfile =~ m/^\.\.?$/); 
    241  
     379    my $file_subfile = &util::filename_cat($file, $subfile); 
     380    my $full_filename = &util::filename_cat($this_file_base_dir, $file_subfile); 
     381    if ($self->file_is_blocked($block_hash,$full_filename)) { 
     382        print STDERR "DirectoryPlugin: file $full_filename was blocked for metadata_read\n" if ($verbosity > 2); 
     383        next; 
     384    } 
     385     
    242386    # Recursively read each $subfile 
    243387    print $outhandle "DirectoryPlugin metadata recurring: $subfile\n" if ($verbosity > 2); 
    244388     
    245389    $count += &plugin::metadata_read ($pluginfo, $this_file_base_dir, 
    246                       &util::filename_cat($file, $subfile), 
     390                      $file_subfile,$block_hash, 
    247391                      $out_metadata, \@extrametakeys, \%extrametadata, 
    248392                      $processor, $maxdocs, $gli); 
    249393    $additionalmetadata = 1; 
    250394    } 
    251     
     395 
    252396    # filter out any extrametakeys that mention subdirectories and store 
    253397    # for later use (i.e. when that sub-directory is being processed) 
     
    313457    next if ($subfile =~ /^\.\.?$/); 
    314458 
     459    my $file_subfile = &util::filename_cat($file, $subfile); 
     460    my $full_filename  
     461        = &util::filename_cat($this_file_base_dir,$file_subfile); 
     462 
     463    if ($self->file_is_blocked($block_hash,$full_filename)) { 
     464        print STDERR "DirectoryPlugin: file $full_filename was blocked for read\n" if ($verbosity > 2); 
     465        next; 
     466    } 
     467     
    315468    # Follow Windows shortcuts 
    316469    if ($subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) { 
     
    350503    &metadatautil::combine_metadata_structures($out_metadata, $in_metadata); 
    351504 
     505    # check the assocfile_info 
     506    if (defined $self->{'assocfile_info'}->{$full_filename}) { 
     507        &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$full_filename}); 
     508    } 
    352509        ## encode the filename as perl5 doesn't handle unicode filenames        
    353510        my $tmpfile = Encode::encode_utf8($subfile);  
     
    369526 
    370527 
    371     my $file_subfile = &util::filename_cat($file, $subfile); 
    372     my $filename_subfile  
    373         = &util::filename_cat($this_file_base_dir,$file_subfile); 
    374528    if (defined $self->{'inf_timestamp'}) { 
    375529        my $inf_timestamp = $self->{'inf_timestamp'}; 
    376530 
    377         if (! -d $filename_subfile) { 
    378         my $filename_timestamp = -M $filename_subfile; 
     531        if (! -d $full_filename) { 
     532        my $filename_timestamp = -M $full_filename; 
    379533        if ($filename_timestamp > $inf_timestamp) { 
    380534            # filename has been around for longer than inf 
     
    389543     
    390544    $count += &plugin::read ($pluginfo, $this_file_base_dir, 
    391                  $file_subfile, 
     545                 $file_subfile, $block_hash, 
    392546                 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli); 
    393547    }