Ignore:
Timestamp:
2004-07-01T14:48:55+12:00 (20 years ago)
Author:
mdewsnip
Message:

First cut at upgrading the CDS/ISIS plugin to obtain and index documents specified in the database (for the UNESCO contract).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/RecPlug.pm

    r7362 r7686  
    115115    'desc' => "{RecPlug.use_metadata_files}",
    116116    'type' => "flag",
     117    'reqd' => "no" },
     118      { 'name' => "recheck_directories",
     119    'desc' => "{RecPlug.recheck_directories}",
     120    'type' => "flag",
    117121    'reqd' => "no" } ];
    118122
     
    137141    if (!parsargv::parse(\@_,
    138142             q^use_metadata_files^, \$self->{'use_metadata_files'},
     143             q^recheck_directories^, \$self->{'recheck_directories'},
    139144             "allow_extra_options")) {
    140145    print STDERR "\nRecPlug uses an incorrect option.\n";
     
    232237    @dir = readdir (DIR);
    233238    closedir (DIR);
    234    
     239
     240    # Re-order the files in the list so any directories ending with .all are moved to the end
     241    for ($i = scalar(@dir) - 1; $i >= 0; $i--) {
     242    if (-d $dir[$i] && $dir[$i] =~ /\.all$/) {
     243        push(@dir, splice(@dir, $i, 1));
     244    }
     245    }
     246
    235247    # read XML metadata files (if supplied)
    236248    my $additionalmetadata = 0;      # is there extra metadata available?
     
    252264    # import each of the files in the directory
    253265    my $out_metadata;
    254     foreach $subfile (@dir) {
    255        
     266    my $num_files = scalar(@dir);
     267    for (my $i = 0; $i <= scalar(@dir); $i++) {
     268    # When every file in the directory has been done, pause for a moment (figuratively!)
     269    # If the -recheck_directories argument hasn't been provided, stop now (default)
     270    # Otherwise, re-read the contents of the directory to check for new files
     271    #   Any new files are added to the @dir list and are processed as normal
     272    #   This is necessary when documents to be indexed are specified in bibliographic DBs
     273    #   These files are copied/downloaded and stored in a new folder at import time
     274    if ($i == $num_files) {
     275        last unless $self->{'recheck_directories'};
     276
     277        # Re-read the files in the directory to see if there are any new files
     278        last if (!opendir (DIR, $dirname));
     279        my @dirnow = readdir (DIR);
     280        closedir (DIR);
     281
     282        # We're only interested if there are more files than there were before
     283        last if (scalar(@dirnow) <= scalar(@dir));
     284
     285        # Any new files are added to the end of @dir to get processed by the loop
     286        foreach my $subfilenow (@dirnow) {
     287        for ($j = 0; $j < $num_files; $j++) {
     288            last if ($subfilenow eq $dir[$j]);
     289        }
     290        if ($j == $num_files) {
     291            # New file
     292            push(@dir, $subfilenow);
     293        }
     294        }
     295
     296        # When the new files have been processed, check again
     297        $num_files = scalar(@dir);
     298    }
     299
     300    my $subfile = $dir[$i];
    256301    last if ($maxdocs != -1 && $count >= $maxdocs);
    257302    next if ($subfile =~ /^\.\.?$/);
     
    304349                 $out_metadata, $processor, $maxdocs, $gli);
    305350    }
    306     return $count;
    307    
     351
     352    return $count;   
    308353}
    309354
Note: See TracChangeset for help on using the changeset viewer.