Changeset 16391 for gsdl


Ignore:
Timestamp:
2008-07-14T14:56:43+12:00 (16 years ago)
Author:
kjdon
Message:

global block pass: this plugin now does the blocking - when reading through a directory, it checks each filename agains the block_hash to see if its been recorded or not. If it has, then its blocked and not passed on to the plugin pipeline.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r15870 r16391  
    8888    die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
    8989    }
    90        
     90   
    9191    $self->{'subdir_extrametakeys'} = {};
    9292
     
    135135}
    136136
    137 # return number of files processed, undef if can't process
    138 # Note that $base_dir might be "" and that $file might
    139 # include directories
    140 
    141 # This function passes around metadata hash structures.  Metadata hash
    142 # structures are hashes that map from a (scalar) key (the metadata element
    143 # name) to either a scalar metadata value or a reference to an array of
    144 # such values.
    145 
    146 sub read {
    147     my $self = shift (@_);
    148     my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    149    
     137sub check_directory_path {
     138
     139    my $self = shift(@_);
     140    my ($dirname) = @_;
     141   
     142    return undef unless (-d $dirname);
     143
     144    return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
     145
    150146    my $outhandle = $self->{'outhandle'};
    151     my $verbosity = $self->{'verbosity'};
    152  
    153     # Calculate the directory name and ensure it is a directory and
    154     # that it is not explicitly blocked.
    155     my $dirname = $file;
    156     $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    157     return undef unless (-d $dirname);
    158     return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
    159 
     147   
    160148    # check to make sure we're not reading the archives or index directory
    161149    my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
     
    176164    return 0;
    177165    }
    178    
    179     if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
    180         print $outhandle "DirectoryPlugin: metadata passed in: ",
    181     join(", ", keys %$in_metadata), "\n";
    182     }
    183    
    184     # Recur over directory contents.
     166
     167    return 1;
     168}
     169
     170# this may be called more than once
     171sub sort_out_associated_files {
     172
     173    my $self = shift (@_);
     174    my ($block_hash) = @_;
     175    if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
     176    return;
     177    }
     178
     179    $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
     180    my $metadata = $self->{'assocfile_info'};
     181    foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
     182    my $record = $block_hash->{'shared_fileroot'}->{$prefix};
     183
     184    my $tie_to = $record->{'tie_to'};
     185    my $exts = $record->{'exts'};
     186   
     187    if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
     188        # set up fileblocks and assocfile_tobe
     189        my $base_file = "$prefix$tie_to";
     190        $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
     191        my $base_file_metadata = $metadata->{$base_file};
     192       
     193        $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
     194        my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
     195        foreach my $e (keys %$exts) {
     196        # block the file
     197        $block_hash->{'file_blocks'}->{"$prefix$e"} = 1;
     198        # set up as an associatd file
     199        print STDERR "  $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
     200        my $mime_type = ""; # let system auto detect this
     201        push(@$assoc_tobe,"$prefix$e:$mime_type:");
     202
     203        }
     204    }
     205    } # foreach record
     206
     207    $block_hash->{'shared_fileroot'} = undef;
     208    $block_hash->{'shared_fileroot'} = {};
     209
     210}
     211
     212
     213# do block exp OR special blocking ???
     214
     215sub file_is_blocked {
     216    my $self = shift (@_);
     217    my ($block_hash, $filename_full_path) = @_;
     218
     219    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
     220    $self->{'num_blocked'} ++;
     221    return 1;
     222    }
     223    # check Directory plugin's own block_exp
     224    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
     225    $self->{'num_blocked'} ++;
     226    return 1; # blocked
     227    }
     228    return 0;
     229}
     230
     231
     232
     233sub file_block_read {
     234    my $self = shift (@_);
     235    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
     236
     237    my $outhandle = $self->{'outhandle'};
     238    my $verbosity = $self->{'verbosity'};
     239   
     240    # Calculate the directory name and ensure it is a directory and
     241    # that it is not explicitly blocked.
     242    my $dirname = $file;
     243    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
     244
     245    my $directory_ok = $self->check_directory_path($dirname);
     246    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
     247
     248    $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
     249    $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
     250
     251     # Recur over directory contents.
    185252    my (@dir, $subfile);
    186     my $count = 0;
    187    
    188     print $outhandle "DirectoryPlugin: getting directory $dirname\n" if ($verbosity);
     253    #my $count = 0;
     254   
     255    print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
    189256   
    190257    # find all the files in the directory
     
    198265    @dir = readdir (DIR);
    199266    closedir (DIR);
     267   
     268    for (my $i = 0; $i < scalar(@dir); $i++) {
     269    my $subfile = $dir[$i];
     270    my $this_file_base_dir = $base_dir;
     271    next if ($subfile =~ m/^\.\.?$/);
     272
     273    # Recursively read each $subfile
     274    print $outhandle "DirectoryPlugin block recurring: $subfile\n" if ($verbosity > 2);
     275   
     276    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
     277    &plugin::file_block_read ($pluginfo, $this_file_base_dir,
     278                  &util::filename_cat($file, $subfile),
     279                  $block_hash, $metadata, $gli);
     280   
     281    }
     282    $self->sort_out_associated_files($block_hash);
     283    #return $count;
     284   
     285}
     286# return number of files processed, undef if can't process
     287# Note that $base_dir might be "" and that $file might
     288# include directories
     289
     290# This function passes around metadata hash structures.  Metadata hash
     291# structures are hashes that map from a (scalar) key (the metadata element
     292# name) to either a scalar metadata value or a reference to an array of
     293# such values.
     294
     295sub read {
     296    my $self = shift (@_);
     297    my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     298   
     299    my $outhandle = $self->{'outhandle'};
     300    my $verbosity = $self->{'verbosity'};
     301   
     302    # Calculate the directory name and ensure it is a directory and
     303    # that it is not explicitly blocked.
     304    my $dirname;
     305    if ($file eq "") {
     306    $dirname = $base_dir;
     307    } else {
     308    $dirname = $file;
     309    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
     310    }
     311
     312    my $directory_ok = $self->check_directory_path($dirname);
     313    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
     314       
     315    if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
     316        print $outhandle "DirectoryPlugin: metadata passed in: ",
     317    join(", ", keys %$in_metadata), "\n";
     318    }
     319   
     320
     321    # Recur over directory contents.
     322    my (@dir, $subfile);
     323    my $count = 0;
     324   
     325    print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
     326   
     327    # find all the files in the directory
     328    if (!opendir (DIR, $dirname)) {
     329    if ($gli) {
     330        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
     331    }
     332    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
     333    return -1; # error in processing
     334    }
     335    @dir = readdir (DIR);
     336    closedir (DIR);
    200337
    201338    # Re-order the files in the list so any directories ending with .all are moved to the end
     
    211348    my %extrametadata;               # maps from filespec to extra metadata keys
    212349    my @extrametakeys;               # keys of %extrametadata in order read
     350
    213351
    214352    my $os_dirsep = &util::get_os_dirsep();
     
    239377    last if ($maxdocs != -1 && $count >= $maxdocs);
    240378    next if ($subfile =~ m/^\.\.?$/);
    241 
     379    my $file_subfile = &util::filename_cat($file, $subfile);
     380    my $full_filename = &util::filename_cat($this_file_base_dir, $file_subfile);
     381    if ($self->file_is_blocked($block_hash,$full_filename)) {
     382        print STDERR "DirectoryPlugin: file $full_filename was blocked for metadata_read\n" if ($verbosity > 2);
     383        next;
     384    }
     385   
    242386    # Recursively read each $subfile
    243387    print $outhandle "DirectoryPlugin metadata recurring: $subfile\n" if ($verbosity > 2);
    244388   
    245389    $count += &plugin::metadata_read ($pluginfo, $this_file_base_dir,
    246                       &util::filename_cat($file, $subfile),
     390                      $file_subfile,$block_hash,
    247391                      $out_metadata, \@extrametakeys, \%extrametadata,
    248392                      $processor, $maxdocs, $gli);
    249393    $additionalmetadata = 1;
    250394    }
    251    
     395
    252396    # filter out any extrametakeys that mention subdirectories and store
    253397    # for later use (i.e. when that sub-directory is being processed)
     
    313457    next if ($subfile =~ /^\.\.?$/);
    314458
     459    my $file_subfile = &util::filename_cat($file, $subfile);
     460    my $full_filename
     461        = &util::filename_cat($this_file_base_dir,$file_subfile);
     462
     463    if ($self->file_is_blocked($block_hash,$full_filename)) {
     464        print STDERR "DirectoryPlugin: file $full_filename was blocked for read\n" if ($verbosity > 2);
     465        next;
     466    }
     467   
    315468    # Follow Windows shortcuts
    316469    if ($subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
     
    350503    &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
    351504
     505    # check the assocfile_info
     506    if (defined $self->{'assocfile_info'}->{$full_filename}) {
     507        &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$full_filename});
     508    }
    352509        ## encode the filename as perl5 doesn't handle unicode filenames       
    353510        my $tmpfile = Encode::encode_utf8($subfile);
     
    369526
    370527
    371     my $file_subfile = &util::filename_cat($file, $subfile);
    372     my $filename_subfile
    373         = &util::filename_cat($this_file_base_dir,$file_subfile);
    374528    if (defined $self->{'inf_timestamp'}) {
    375529        my $inf_timestamp = $self->{'inf_timestamp'};
    376530
    377         if (! -d $filename_subfile) {
    378         my $filename_timestamp = -M $filename_subfile;
     531        if (! -d $full_filename) {
     532        my $filename_timestamp = -M $full_filename;
    379533        if ($filename_timestamp > $inf_timestamp) {
    380534            # filename has been around for longer than inf
     
    389543   
    390544    $count += &plugin::read ($pluginfo, $this_file_base_dir,
    391                  $file_subfile,
     545                 $file_subfile, $block_hash,
    392546                 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
    393547    }
Note: See TracChangeset for help on using the changeset viewer.