Changeset 15870 for gsdl/trunk/perllib/plugins/DirectoryPlugin.pm
- Timestamp:
- 2008-06-05T09:24:02+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/DirectoryPlugin.pm
r15865 r15870 1 1 ########################################################################### 2 2 # 3 # RecPlug.pm --3 # DirectoryPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 # RecPlug is a plugin which recurses through directories processing 27 # each file it finds. 28 29 # RecPlug has one option: use_metadata_files. When this is set, it will 30 # check each directory for an XML file called "metadata.xml" that specifies 31 # metadata for the files (and subdirectories) in the directory. 32 # 33 # Here's an example of a metadata file that uses three FileSet structures 34 # (ignore the # characters): 35 36 #<?xml version="1.0" encoding="UTF-8" standalone="no"?> 37 #<!DOCTYPE DirectoryMetadata SYSTEM "http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd"> 38 #<DirectoryMetadata> 39 # <FileSet> 40 # <FileName>nugget.*</FileName> 41 # <Description> 42 # <Metadata name="Title">Nugget Point, The Catlins</Metadata> 43 # <Metadata name="Place" mode="accumulate">Nugget Point</Metadata> 44 # </Description> 45 # </FileSet> 46 # <FileSet> 47 # <FileName>nugget-point-1.jpg</FileName> 48 # <Description> 49 # <Metadata name="Title">Nugget Point Lighthouse, The Catlins</Metadata> 50 # <Metadata name="Subject">Lighthouse</Metadata> 51 # </Description> 52 # </FileSet> 53 # <FileSet> 54 # <FileName>kaka-point-dir</FileName> 55 # <Description> 56 # <Metadata name="Title">Kaka Point, The Catlins</Metadata> 57 # </Description> 58 # </FileSet> 59 #</DirectoryMetadata> 60 61 # Metadata elements are read and applied to files in the order they appear 62 # in the file. 63 # 64 # The FileName element describes the subfiles in the directory that the 65 # metadata applies to as a perl regular expression (a FileSet group may 66 # contain multiple FileName elements). So, <FileName>nugget.*</FileName> 67 # indicates that the metadata records in the following Description block 68 # apply to every subfile that starts with "nugget". For these files, a 69 # Title metadata element is set, overriding any old value that the Title 70 # might have had. 71 # 72 # Occasionally, we want to have multiple metadata values applied to a 73 # document; in this case we use the "mode=accumulate" attribute of the 74 # particular Metadata element. In the second metadata element of the first 75 # FileSet above, the "Place" metadata is accumulating, and may therefore be 76 # given several values. If we wanted to override these values and use a 77 # single metadata element again, we could set the mode attribute to 78 # "override" instead. Remember: every element is assumed to be in override 79 # mode unless you specify otherwise, so if you want to accumulate metadata 80 # for some field, every occurance must have "mode=accumulate" specified. 81 # 82 # The second FileSet element above applies to a specific file, called 83 # nugget-point-1.jpg. This element overrides the Title metadata set in the 84 # first FileSet, and adds a "Subject" metadata field. 85 # 86 # The third and final FileSet sets metadata for a subdirectory rather than 87 # a file. The metadata specified (a Title) will be passed into the 88 # subdirectory and applied to every file that occurs in the subdirectory 89 # (and to every subsubdirectory and its contents, and so on) unless the 90 # metadata is explictly overridden later in the import. 91 92 93 94 package RecPlug; 95 96 use BasPlug; 26 # DirectoryPlugin is a plugin which recurses through directories processing 27 # each file it finds - which basically means passing it down the plugin 28 # pipeline 29 30 package DirectoryPlugin; 31 32 use AbstractPlugin; 97 33 use plugin; 98 34 use util; … … 102 38 use strict; 103 39 no strict 'refs'; 40 no strict 'subs'; 41 104 42 use Encode; 105 43 106 44 BEGIN { 107 @ RecPlug::ISA = ('BasPlug');45 @DirectoryPlugin::ISA = ('AbstractPlugin'); 108 46 } 109 47 110 48 my $arguments = 111 49 [ { 'name' => "block_exp", 112 'desc' => "{Bas Plug.block_exp}",50 'desc' => "{BasePlugin.block_exp}", 113 51 'type' => "regexp", 114 52 'deft' => &get_default_block_exp(), … … 116 54 # this option has been deprecated. leave it here for now so we can warn people not to use it 117 55 { 'name' => "use_metadata_files", 118 'desc' => "{ RecPlug.use_metadata_files}",56 'desc' => "{DirectoryPlugin.use_metadata_files}", 119 57 'type' => "flag", 120 58 'reqd' => "no", 121 59 'hiddengli' => "yes" }, 122 60 { 'name' => "recheck_directories", 123 'desc' => "{ RecPlug.recheck_directories}",61 'desc' => "{DirectoryPlugin.recheck_directories}", 124 62 'type' => "flag", 125 63 'reqd' => "no" } ]; 126 64 127 my $options = { 'name' => " RecPlug",128 'desc' => "{ RecPlug.desc}",65 my $options = { 'name' => "DirectoryPlugin", 66 'desc' => "{DirectoryPlugin.desc}", 129 67 'abstract' => "no", 130 68 'inherits' => "yes", … … 136 74 push(@$pluginlist, $class); 137 75 138 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}139 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};140 141 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 77 push(@{$hashArgOptLists->{"OptList"}},$options); 78 79 my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists); 142 80 143 81 if ($self->{'info_only'}) { … … 148 86 # we have left this option in so we can warn people who are still using it 149 87 if ($self->{'use_metadata_files'}) { 150 die "ERROR: RecPlug-use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n"; 151 89 } 152 90 … … 174 112 } 175 113 176 $self->SUPER::begin($pluginfo, $base_dir, $processor, $maxdocs); 177 } 178 179 114 #$self->SUPER::begin($pluginfo, $base_dir, $processor, $maxdocs); 115 } 116 117 sub end { 118 119 } 120 121 sub deinit { 122 123 } 180 124 # return 1 if this class might recurse using $pluginfo 181 125 sub is_recursive { … … 206 150 my $outhandle = $self->{'outhandle'}; 207 151 my $verbosity = $self->{'verbosity'}; 208 my $read_metadata_files = $self->{'use_metadata_files'}; 209 152 210 153 # Calculate the directory name and ensure it is a directory and 211 154 # that it is not explicitly blocked. … … 218 161 my $gsdlhome = quotemeta($ENV{'GSDLHOME'}); 219 162 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) { 220 print $outhandle " RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";163 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n"; 221 164 return 0; 222 165 } … … 224 167 # check to see we haven't got a cyclic path... 225 168 if ($dirname =~ m%(/.*){,41}%) { 226 print $outhandle " RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";169 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n"; 227 170 return 0; 228 171 } … … 230 173 # check to see we haven't got a cyclic path... 231 174 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) { 232 print $outhandle " RecPlug: $dirname appears to be in a recursive loop...\n";175 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n"; 233 176 return 0; 234 177 } 235 178 236 179 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) { 237 print $outhandle " RecPlug: metadata passed in: ",180 print $outhandle "DirectoryPlugin: metadata passed in: ", 238 181 join(", ", keys %$in_metadata), "\n"; 239 182 } … … 243 186 my $count = 0; 244 187 245 print $outhandle " RecPlug: getting directory $dirname\n" if ($verbosity);188 print $outhandle "DirectoryPlugin: getting directory $dirname\n" if ($verbosity); 246 189 247 190 # find all the files in the directory … … 250 193 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n"; 251 194 } 252 print $outhandle " RecPlug: WARNING - couldn't read directory $dirname\n";195 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n"; 253 196 return -1; # error in processing 254 197 } … … 298 241 299 242 # Recursively read each $subfile 300 print $outhandle " RecPlugmetadata recurring: $subfile\n" if ($verbosity > 2);243 print $outhandle "DirectoryPlugin metadata recurring: $subfile\n" if ($verbosity > 2); 301 244 302 245 $count += &plugin::metadata_read ($pluginfo, $this_file_base_dir, … … 389 332 if (!defined ($linkdest)) { 390 333 # system error - file not found? 391 warn " RecPlug: symlink problem - $!";334 warn "DirectoryPlugin: symlink problem - $!"; 392 335 } else { 393 336 # see if link points to current or a parent directory 394 337 if ($linkdest =~ m@^[\./\\]+$@ || 395 338 index($dirname, $linkdest) != -1) { 396 warn " RecPlug: Ignoring recursive symlink ($dirname/$subfile -> $linkdest)\n";339 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$subfile -> $linkdest)\n"; 397 340 next; 398 341 ; … … 401 344 } 402 345 403 print $outhandle " RecPlug: preparing metadata for $subfile\n" if ($verbosity > 2);346 print $outhandle "DirectoryPlugin: preparing metadata for $subfile\n" if ($verbosity > 2); 404 347 405 348 # Make a copy of $in_metadata to pass to $subfile … … 443 386 444 387 # Recursively read each $subfile 445 print $outhandle " RecPlugrecurring: $subfile\n" if ($verbosity > 2);388 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2); 446 389 447 390 $count += &plugin::read ($pluginfo, $this_file_base_dir,
Note:
See TracChangeset
for help on using the changeset viewer.