Changeset 16391
- Timestamp:
- 2008-07-14T14:56:43+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/DirectoryPlugin.pm
r15870 r16391 88 88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n"; 89 89 } 90 90 91 91 $self->{'subdir_extrametakeys'} = {}; 92 92 … … 135 135 } 136 136 137 # return number of files processed, undef if can't process 138 # Note that $base_dir might be "" and that $file might 139 # include directories 140 141 # This function passes around metadata hash structures. Metadata hash 142 # structures are hashes that map from a (scalar) key (the metadata element 143 # name) to either a scalar metadata value or a reference to an array of 144 # such values. 145 146 sub read { 147 my $self = shift (@_); 148 my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_; 149 137 sub check_directory_path { 138 139 my $self = shift(@_); 140 my ($dirname) = @_; 141 142 return undef unless (-d $dirname); 143 144 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/); 145 150 146 my $outhandle = $self->{'outhandle'}; 151 my $verbosity = $self->{'verbosity'}; 152 153 # Calculate the directory name and ensure it is a directory and 154 # that it is not explicitly blocked. 155 my $dirname = $file; 156 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 157 return undef unless (-d $dirname); 158 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/); 159 147 160 148 # check to make sure we're not reading the archives or index directory 161 149 my $gsdlhome = quotemeta($ENV{'GSDLHOME'}); … … 176 164 return 0; 177 165 } 178 179 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) { 180 print $outhandle "DirectoryPlugin: metadata passed in: ", 181 join(", ", keys %$in_metadata), "\n"; 182 } 183 184 # Recur over directory contents. 166 167 return 1; 168 } 169 170 # this may be called more than once 171 sub sort_out_associated_files { 172 173 my $self = shift (@_); 174 my ($block_hash) = @_; 175 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) { 176 return; 177 } 178 179 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'}; 180 my $metadata = $self->{'assocfile_info'}; 181 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) { 182 my $record = $block_hash->{'shared_fileroot'}->{$prefix}; 183 184 my $tie_to = $record->{'tie_to'}; 185 my $exts = $record->{'exts'}; 186 187 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) { 188 # set up fileblocks and assocfile_tobe 189 my $base_file = "$prefix$tie_to"; 190 $metadata->{$base_file} = {} unless defined $metadata->{$base_file}; 191 my $base_file_metadata = $metadata->{$base_file}; 192 193 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'}; 194 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'}; 195 foreach my $e (keys %$exts) { 196 # block the file 197 $block_hash->{'file_blocks'}->{"$prefix$e"} = 1; 198 # set up as an associatd file 199 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n"; 200 my $mime_type = ""; # let system auto detect this 201 push(@$assoc_tobe,"$prefix$e:$mime_type:"); 202 203 } 204 } 205 } # foreach record 206 207 $block_hash->{'shared_fileroot'} = undef; 208 $block_hash->{'shared_fileroot'} = {}; 209 210 } 211 212 213 # do block exp OR special blocking ??? 214 215 sub file_is_blocked { 216 my $self = shift (@_); 217 my ($block_hash, $filename_full_path) = @_; 218 219 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) { 220 $self->{'num_blocked'} ++; 221 return 1; 222 } 223 # check Directory plugin's own block_exp 224 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 225 $self->{'num_blocked'} ++; 226 return 1; # blocked 227 } 228 return 0; 229 } 230 231 232 233 sub file_block_read { 234 my $self = shift (@_); 235 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_; 236 237 my $outhandle = $self->{'outhandle'}; 238 my $verbosity = $self->{'verbosity'}; 239 240 # Calculate the directory name and ensure it is a directory and 241 # that it is not explicitly blocked. 242 my $dirname = $file; 243 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 244 245 my $directory_ok = $self->check_directory_path($dirname); 246 return $directory_ok unless (defined $directory_ok && $directory_ok == 1); 247 248 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'}; 249 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'}; 250 251 # Recur over directory contents. 185 252 my (@dir, $subfile); 186 my $count = 0;187 188 print $outhandle "DirectoryPlugin : getting directory $dirname\n" if ($verbosity);253 #my $count = 0; 254 255 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2); 189 256 190 257 # find all the files in the directory … … 198 265 @dir = readdir (DIR); 199 266 closedir (DIR); 267 268 for (my $i = 0; $i < scalar(@dir); $i++) { 269 my $subfile = $dir[$i]; 270 my $this_file_base_dir = $base_dir; 271 next if ($subfile =~ m/^\.\.?$/); 272 273 # Recursively read each $subfile 274 print $outhandle "DirectoryPlugin block recurring: $subfile\n" if ($verbosity > 2); 275 276 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir, 277 &plugin::file_block_read ($pluginfo, $this_file_base_dir, 278 &util::filename_cat($file, $subfile), 279 $block_hash, $metadata, $gli); 280 281 } 282 $self->sort_out_associated_files($block_hash); 283 #return $count; 284 285 } 286 # return number of files processed, undef if can't process 287 # Note that $base_dir might be "" and that $file might 288 # include directories 289 290 # This function passes around metadata hash structures. Metadata hash 291 # structures are hashes that map from a (scalar) key (the metadata element 292 # name) to either a scalar metadata value or a reference to an array of 293 # such values. 294 295 sub read { 296 my $self = shift (@_); 297 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_; 298 299 my $outhandle = $self->{'outhandle'}; 300 my $verbosity = $self->{'verbosity'}; 301 302 # Calculate the directory name and ensure it is a directory and 303 # that it is not explicitly blocked. 304 my $dirname; 305 if ($file eq "") { 306 $dirname = $base_dir; 307 } else { 308 $dirname = $file; 309 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 310 } 311 312 my $directory_ok = $self->check_directory_path($dirname); 313 return $directory_ok unless (defined $directory_ok && $directory_ok == 1); 314 315 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) { 316 print $outhandle "DirectoryPlugin: metadata passed in: ", 317 join(", ", keys %$in_metadata), "\n"; 318 } 319 320 321 # Recur over directory contents. 322 my (@dir, $subfile); 323 my $count = 0; 324 325 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2); 326 327 # find all the files in the directory 328 if (!opendir (DIR, $dirname)) { 329 if ($gli) { 330 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n"; 331 } 332 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n"; 333 return -1; # error in processing 334 } 335 @dir = readdir (DIR); 336 closedir (DIR); 200 337 201 338 # Re-order the files in the list so any directories ending with .all are moved to the end … … 211 348 my %extrametadata; # maps from filespec to extra metadata keys 212 349 my @extrametakeys; # keys of %extrametadata in order read 350 213 351 214 352 my $os_dirsep = &util::get_os_dirsep(); … … 239 377 last if ($maxdocs != -1 && $count >= $maxdocs); 240 378 next if ($subfile =~ m/^\.\.?$/); 241 379 my $file_subfile = &util::filename_cat($file, $subfile); 380 my $full_filename = &util::filename_cat($this_file_base_dir, $file_subfile); 381 if ($self->file_is_blocked($block_hash,$full_filename)) { 382 print STDERR "DirectoryPlugin: file $full_filename was blocked for metadata_read\n" if ($verbosity > 2); 383 next; 384 } 385 242 386 # Recursively read each $subfile 243 387 print $outhandle "DirectoryPlugin metadata recurring: $subfile\n" if ($verbosity > 2); 244 388 245 389 $count += &plugin::metadata_read ($pluginfo, $this_file_base_dir, 246 &util::filename_cat($file, $subfile),390 $file_subfile,$block_hash, 247 391 $out_metadata, \@extrametakeys, \%extrametadata, 248 392 $processor, $maxdocs, $gli); 249 393 $additionalmetadata = 1; 250 394 } 251 395 252 396 # filter out any extrametakeys that mention subdirectories and store 253 397 # for later use (i.e. when that sub-directory is being processed) … … 313 457 next if ($subfile =~ /^\.\.?$/); 314 458 459 my $file_subfile = &util::filename_cat($file, $subfile); 460 my $full_filename 461 = &util::filename_cat($this_file_base_dir,$file_subfile); 462 463 if ($self->file_is_blocked($block_hash,$full_filename)) { 464 print STDERR "DirectoryPlugin: file $full_filename was blocked for read\n" if ($verbosity > 2); 465 next; 466 } 467 315 468 # Follow Windows shortcuts 316 469 if ($subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) { … … 350 503 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata); 351 504 505 # check the assocfile_info 506 if (defined $self->{'assocfile_info'}->{$full_filename}) { 507 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$full_filename}); 508 } 352 509 ## encode the filename as perl5 doesn't handle unicode filenames 353 510 my $tmpfile = Encode::encode_utf8($subfile); … … 369 526 370 527 371 my $file_subfile = &util::filename_cat($file, $subfile);372 my $filename_subfile373 = &util::filename_cat($this_file_base_dir,$file_subfile);374 528 if (defined $self->{'inf_timestamp'}) { 375 529 my $inf_timestamp = $self->{'inf_timestamp'}; 376 530 377 if (! -d $f ilename_subfile) {378 my $filename_timestamp = -M $f ilename_subfile;531 if (! -d $full_filename) { 532 my $filename_timestamp = -M $full_filename; 379 533 if ($filename_timestamp > $inf_timestamp) { 380 534 # filename has been around for longer than inf … … 389 543 390 544 $count += &plugin::read ($pluginfo, $this_file_base_dir, 391 $file_subfile, 545 $file_subfile, $block_hash, 392 546 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli); 393 547 }
Note:
See TracChangeset
for help on using the changeset viewer.