Ignore:
Timestamp:
2008-06-05T09:24:02+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: ArchivesInf and Directory plugins are not true plugins as they don't process a file during import. I'd like to get rid of them all together and make them part of import/build scripts. In the meantime they are still here, and inherit from AbstractPlugin not BasePlugin as they don't need all the options that BasePlugin provides

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r15865 r15870  
    11###########################################################################
    22#
    3 # RecPlug.pm --
     3# DirectoryPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 # RecPlug is a plugin which recurses through directories processing
    27 # each file it finds.
    28 
    29 # RecPlug has one option: use_metadata_files.  When this is set, it will
    30 # check each directory for an XML file called "metadata.xml" that specifies
    31 # metadata for the files (and subdirectories) in the directory.
    32 #
    33 # Here's an example of a metadata file that uses three FileSet structures
    34 # (ignore the # characters):
    35 
    36 #<?xml version="1.0" encoding="UTF-8" standalone="no"?>
    37 #<!DOCTYPE DirectoryMetadata SYSTEM "http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd">
    38 #<DirectoryMetadata>
    39 #  <FileSet>
    40 #    <FileName>nugget.*</FileName>
    41 #    <Description>
    42 #      <Metadata name="Title">Nugget Point, The Catlins</Metadata>
    43 #      <Metadata name="Place" mode="accumulate">Nugget Point</Metadata>
    44 #    </Description>
    45 #  </FileSet>
    46 #  <FileSet>
    47 #    <FileName>nugget-point-1.jpg</FileName>
    48 #    <Description>
    49 #      <Metadata name="Title">Nugget Point Lighthouse, The Catlins</Metadata>
    50 #      <Metadata name="Subject">Lighthouse</Metadata>
    51 #    </Description>
    52 #  </FileSet>
    53 #  <FileSet>
    54 #    <FileName>kaka-point-dir</FileName>
    55 #    <Description>
    56 #      <Metadata name="Title">Kaka Point, The Catlins</Metadata>
    57 #    </Description>
    58 #  </FileSet>
    59 #</DirectoryMetadata>
    60 
    61 # Metadata elements are read and applied to files in the order they appear
    62 # in the file.
    63 #
    64 # The FileName element describes the subfiles in the directory that the
    65 # metadata applies to as a perl regular expression (a FileSet group may
    66 # contain multiple FileName elements). So, <FileName>nugget.*</FileName>
    67 # indicates that the metadata records in the following Description block
    68 # apply to every subfile that starts with "nugget".  For these files, a
    69 # Title metadata element is set, overriding any old value that the Title
    70 # might have had.
    71 #
    72 # Occasionally, we want to have multiple metadata values applied to a
    73 # document; in this case we use the "mode=accumulate" attribute of the
    74 # particular Metadata element.  In the second metadata element of the first
    75 # FileSet above, the "Place" metadata is accumulating, and may therefore be
    76 # given several values.  If we wanted to override these values and use a
    77 # single metadata element again, we could set the mode attribute to
    78 # "override" instead.  Remember: every element is assumed to be in override
    79 # mode unless you specify otherwise, so if you want to accumulate metadata
    80 # for some field, every occurance must have "mode=accumulate" specified.
    81 #
    82 # The second FileSet element above applies to a specific file, called
    83 # nugget-point-1.jpg.  This element overrides the Title metadata set in the
    84 # first FileSet, and adds a "Subject" metadata field.
    85 #
    86 # The third and final FileSet sets metadata for a subdirectory rather than
    87 # a file.  The metadata specified (a Title) will be passed into the
    88 # subdirectory and applied to every file that occurs in the subdirectory
    89 # (and to every subsubdirectory and its contents, and so on) unless the
    90 # metadata is explictly overridden later in the import.
    91 
    92 
    93 
    94 package RecPlug;
    95 
    96 use BasPlug;
     26# DirectoryPlugin is a plugin which recurses through directories processing
     27# each file it finds - which basically means passing it down the plugin
     28# pipeline
     29
     30package DirectoryPlugin;
     31
     32use AbstractPlugin;
    9733use plugin;
    9834use util;
     
    10238use strict;
    10339no strict 'refs';
     40no strict 'subs';
     41
    10442use Encode;
    10543
    10644BEGIN {
    107     @RecPlug::ISA = ('BasPlug');
     45    @DirectoryPlugin::ISA = ('AbstractPlugin');
    10846}
    10947
    11048my $arguments =
    11149    [ { 'name' => "block_exp",
    112     'desc' => "{BasPlug.block_exp}",
     50    'desc' => "{BasePlugin.block_exp}",
    11351    'type' => "regexp",
    11452    'deft' => &get_default_block_exp(),
     
    11654      # this option has been deprecated. leave it here for now so we can warn people not to use it
    11755      { 'name' => "use_metadata_files",
    118     'desc' => "{RecPlug.use_metadata_files}",
     56    'desc' => "{DirectoryPlugin.use_metadata_files}",
    11957    'type' => "flag",
    12058    'reqd' => "no",
    12159    'hiddengli' => "yes" },
    12260      { 'name' => "recheck_directories",
    123     'desc' => "{RecPlug.recheck_directories}",
     61    'desc' => "{DirectoryPlugin.recheck_directories}",
    12462    'type' => "flag",
    12563    'reqd' => "no" } ];
    12664   
    127 my $options = { 'name'     => "RecPlug",
    128         'desc'     => "{RecPlug.desc}",
     65my $options = { 'name'     => "DirectoryPlugin",
     66        'desc'     => "{DirectoryPlugin.desc}",
    12967        'abstract' => "no",
    13068        'inherits' => "yes",
     
    13674    push(@$pluginlist, $class);
    13775
    138     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    139     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    140 
    141     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     76    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     77    push(@{$hashArgOptLists->{"OptList"}},$options);
     78
     79    my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists);
    14280   
    14381    if ($self->{'info_only'}) {
     
    14886    # we have left this option in so we can warn people who are still using it
    14987    if ($self->{'use_metadata_files'}) {
    150     die "ERROR: RecPlug -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
     88    die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
    15189    }
    15290       
     
    174112    }
    175113
    176     $self->SUPER::begin($pluginfo, $base_dir, $processor, $maxdocs);
    177 }
    178 
    179 
     114    #$self->SUPER::begin($pluginfo, $base_dir, $processor, $maxdocs);
     115}
     116
     117sub end {
     118
     119}
     120
     121sub deinit {
     122
     123}
    180124# return 1 if this class might recurse using $pluginfo
    181125sub is_recursive {
     
    206150    my $outhandle = $self->{'outhandle'};
    207151    my $verbosity = $self->{'verbosity'};
    208     my $read_metadata_files = $self->{'use_metadata_files'};
    209 
     152 
    210153    # Calculate the directory name and ensure it is a directory and
    211154    # that it is not explicitly blocked.
     
    218161    my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
    219162    if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
    220     print $outhandle "RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
     163    print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
    221164        return 0;
    222165    }
     
    224167    # check to see we haven't got a cyclic path...
    225168    if ($dirname =~ m%(/.*){,41}%) {
    226     print $outhandle "RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";
     169    print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
    227170    return 0;
    228171    }
     
    230173    # check to see we haven't got a cyclic path...
    231174    if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
    232     print $outhandle "RecPlug: $dirname appears to be in a recursive loop...\n";
     175    print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
    233176    return 0;
    234177    }
    235178   
    236179    if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
    237         print $outhandle "RecPlug: metadata passed in: ",
     180        print $outhandle "DirectoryPlugin: metadata passed in: ",
    238181    join(", ", keys %$in_metadata), "\n";
    239182    }
     
    243186    my $count = 0;
    244187   
    245     print $outhandle "RecPlug: getting directory $dirname\n" if ($verbosity);
     188    print $outhandle "DirectoryPlugin: getting directory $dirname\n" if ($verbosity);
    246189   
    247190    # find all the files in the directory
     
    250193        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
    251194    }
    252     print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
     195    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
    253196    return -1; # error in processing
    254197    }
     
    298241
    299242    # Recursively read each $subfile
    300     print $outhandle "RecPlug metadata recurring: $subfile\n" if ($verbosity > 2);
     243    print $outhandle "DirectoryPlugin metadata recurring: $subfile\n" if ($verbosity > 2);
    301244   
    302245    $count += &plugin::metadata_read ($pluginfo, $this_file_base_dir,
     
    389332        if (!defined ($linkdest)) {
    390333        # system error - file not found?
    391         warn "RecPlug: symlink problem - $!";
     334        warn "DirectoryPlugin: symlink problem - $!";
    392335        } else {
    393336        # see if link points to current or a parent directory
    394337        if ($linkdest =~ m@^[\./\\]+$@ ||
    395338            index($dirname, $linkdest) != -1) {
    396             warn "RecPlug: Ignoring recursive symlink ($dirname/$subfile -> $linkdest)\n";
     339            warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$subfile -> $linkdest)\n";
    397340            next;
    398341            ;
     
    401344    }
    402345
    403     print $outhandle "RecPlug: preparing metadata for $subfile\n" if ($verbosity > 2);
     346    print $outhandle "DirectoryPlugin: preparing metadata for $subfile\n" if ($verbosity > 2);
    404347
    405348    # Make a copy of $in_metadata to pass to $subfile
     
    443386
    444387    # Recursively read each $subfile
    445     print $outhandle "RecPlug recurring: $subfile\n" if ($verbosity > 2);
     388    print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
    446389   
    447390    $count += &plugin::read ($pluginfo, $this_file_base_dir,
Note: See TracChangeset for help on using the changeset viewer.