Changeset 13188
- Timestamp:
- 2006-10-30T14:00:16+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/RecPlug.pm
r12969 r13188 97 97 use plugin; 98 98 use util; 99 use metadatautil; 99 100 100 101 use File::Basename; … … 104 105 BEGIN { 105 106 @RecPlug::ISA = ('BasPlug'); 106 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 107 } 108 109 use XMLParser; 107 } 110 108 111 109 my $arguments = … … 118 116 'desc' => "{RecPlug.use_metadata_files}", 119 117 'type' => "flag", 120 'reqd' => "no" }, 118 'reqd' => "no", 119 'hiddengli' => "yes" }, 121 120 { 'name' => "recheck_directories", 122 121 'desc' => "{RecPlug.recheck_directories}", 123 122 'type' => "flag", 124 123 'reqd' => "no" } ]; 125 124 126 125 my $options = { 'name' => "RecPlug", 127 126 'desc' => "{RecPlug.desc}", … … 130 129 'args' => $arguments }; 131 130 132 133 my ($self);134 135 131 sub new { 136 132 my ($class) = shift (@_); … … 141 137 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 142 138 143 $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 139 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 140 141 if ($self->{'info_only'}) { 142 # don't worry about any options or initialisations etc 143 return bless $self, $class; 144 } 145 146 # we have left this option in so we can warn people who are still using it 144 147 if ($self->{'use_metadata_files'}) { 145 # create XML::Parser object for parsing metadata.xml files 146 my $parser = new XML::Parser('Style' => 'Stream', 147 'Handlers' => {'Char' => \&Char, 148 'Doctype' => \&Doctype 149 }); 150 151 $self->{'parser'} = $parser; 152 $self->{'in_filename'} = 0; 153 } 154 148 die "ERROR: RecPlug -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n"; 149 } 150 155 151 $self->{'subdir_extrametakeys'} = {}; 156 152 … … 265 261 } 266 262 267 # read XML metadata files (if supplied) 263 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read 264 268 265 my $additionalmetadata = 0; # is there extra metadata available? 269 266 my %extrametadata; # maps from filespec to extra metadata keys … … 287 284 } 288 285 delete($self->{'subdir_extrametakeys'}->{$local_dirname}); 289 }290 291 if ($read_metadata_files) {292 #read the directory "metadata.xml" file293 my $metadatafile = &util::filename_cat ($dirname, 'metadata.xml');294 if (-e $metadatafile) {295 print $outhandle "RecPlug: found metadata in $metadatafile\n"296 if ($verbosity);297 $self->read_metadata_xml_file($metadatafile, \%extrametadata, \@extrametakeys);298 $additionalmetadata = 1;299 }300 286 } 301 287 … … 308 294 last if ($maxdocs != -1 && $count >= $maxdocs); 309 295 next if ($subfile =~ m/^\.\.?$/); 310 #next if ($read_metadata_files && $subfile =~ /metadata\.xml$/);311 296 312 297 # Recursively read each $subfile … … 377 362 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs); 378 363 next if ($subfile =~ /^\.\.?$/); 379 next if ($read_metadata_files && $subfile =~ /metadata\.xml$/);380 364 381 365 # Follow Windows shortcuts … … 414 398 # Make a copy of $in_metadata to pass to $subfile 415 399 $out_metadata = {}; 416 & combine_metadata_structures($out_metadata, $in_metadata);400 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata); 417 401 418 402 # Next add metadata read in XML files (if it is supplied) … … 425 409 if ($verbosity > 2); 426 410 $mdref = $extrametadata{$filespec}; 427 & combine_metadata_structures($out_metadata, $mdref);411 &metadatautil::combine_metadata_structures($out_metadata, $mdref); 428 412 } 429 413 } … … 458 442 } 459 443 460 461 462 # Read a manually-constructed metadata file and store the data463 # it contains in the $metadataref structure.464 #465 # (metadataref is a reference to a hash whose keys are filenames466 # and whose values are metadata hash structures.)467 468 sub read_metadata_xml_file {469 my $self = shift(@_);470 my ($filename, $metadataref, $metakeysref) = @_;471 $self->{'metadataref'} = $metadataref;472 $self->{'metakeysref'} = $metakeysref;473 474 eval {475 $self->{'parser'}->parsefile($filename);476 };477 478 if ($@) {479 die "RecPlug: ERROR $filename is not a well formed metadata.xml file ($@)\n";480 }481 }482 483 sub Doctype {484 my ($expat, $name, $sysid, $pubid, $internal) = @_;485 486 # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files487 # to be processed as well as the "DirectoryMetadata" files which should now488 # be created by import.pl489 die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);490 }491 492 sub StartTag {493 my ($expat, $element) = @_;494 495 if ($element eq "FileSet") {496 $self->{'saved_targets'} = [];497 $self->{'saved_metadata'} = {};498 }499 elsif ($element eq "FileName") {500 $self->{'in_filename'} = 1;501 }502 elsif ($element eq "Metadata") {503 $self->{'metadata_name'} = $_{'name'};504 if ((defined $_{'mode'}) && ($_{'mode'} eq "accumulate")) {505 $self->{'metadata_accumulate'} = 1;506 } else {507 $self->{'metadata_accumulate'} = 0;508 }509 }510 }511 512 sub EndTag {513 my ($expat, $element) = @_;514 515 if ($element eq "FileSet") {516 push (@{$self->{'metakeysref'}}, @{$self->{'saved_targets'}});517 foreach my $target (@{$self->{'saved_targets'}}) {518 my $file_metadata = $self->{'metadataref'}->{$target};519 my $saved_metadata = $self->{'saved_metadata'};520 if (!defined $file_metadata) {521 $self->{'metadataref'}->{$target} = $saved_metadata;522 }523 else {524 $self->combine_metadata_structures($file_metadata,$saved_metadata);525 }526 }527 }528 elsif ($element eq "FileName") {529 $self->{'in_filename'} = 0;530 }531 elsif ($element eq "Metadata") {532 $self->{'metadata_name'} = "";533 }534 535 }536 537 sub store_saved_metadata538 {539 my $self = shift(@_);540 my ($mname,$mvalue,$md_accumulate) = @_;541 542 if (defined $self->{'saved_metadata'}->{$mname}) {543 if ($md_accumulate) {544 # accumulate mode - add value to existing value(s)545 if (ref ($self->{'saved_metadata'}->{$mname}) eq "ARRAY") {546 push (@{$self->{'saved_metadata'}->{$mname}}, $mvalue);547 } else {548 $self->{'saved_metadata'}->{$mname} =549 [$self->{'saved_metadata'}->{$mname}, $mvalue];550 }551 } else {552 # override mode553 $self->{'saved_metadata'}->{$mname} = $mvalue;554 }555 } else {556 if ($md_accumulate) {557 # accumulate mode - add value into (currently empty) array558 $self->{'saved_metadata'}->{$mname} = [$mvalue];559 } else {560 # override mode561 $self->{'saved_metadata'}->{$mname} = $mvalue;562 }563 }564 }565 566 567 sub Text {568 569 if ($self->{'in_filename'}) {570 # $_ == FileName content571 push (@{$self->{'saved_targets'}}, $_);572 }573 elsif (defined ($self->{'metadata_name'}) && $self->{'metadata_name'} ne "") {574 # $_ == Metadata content575 my $mname = $self->{'metadata_name'};576 my $mvalue = $_;577 my $md_accumulate = $self->{'metadata_accumulate'};578 $self->store_saved_metadata($mname,$mvalue,$md_accumulate);579 }580 }581 582 # This Char function overrides the one in XML::Parser::Stream to overcome a583 # problem where $expat->{Text} is treated as the return value, slowing584 # things down significantly in some cases.585 sub Char {586 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+587 $_[0]->{'Text'} .= $_[1];588 return undef;589 }590 591 # Combine two metadata structures. Given two references to metadata592 # element structures, add every field of the second ($mdref2) to the first593 # ($mdref1).594 #595 # Afterwards $mdref1 will be updated, and $mdref2 will be unchanged.596 #597 # We have to be careful about the way we merge metadata when one metadata598 # structure is in "override" mode and one is in "merge" mode. In fact, we599 # use the mode from the second structure, $mdref2, because it is generally600 # defined later (lower in the directory structure) and is therefore more601 # "local" to the document concerned.602 #603 # Another issue is the use of references to pass metadata around. If we604 # simply copy one metadata structure reference to another, then we're605 # effectively just copyinga pointer, and changes to the new referene606 # will affect the old (copied) one also. This also applies to ARRAY607 # references used as metadata element values (hence the "clonedata"608 # function below).609 610 sub combine_metadata_structures {611 my ($mdref1, $mdref2) = @_;612 my ($key, $value1, $value2);613 614 foreach $key (keys %$mdref2) {615 616 $value1 = $mdref1->{$key};617 $value2 = $mdref2->{$key};618 619 # If there is no existing value for this metadata field in620 # $mdref1, so we simply copy the value from $mdref2 over.621 if (!defined $value1) {622 $mdref1->{$key} = &clonedata($value2);623 }624 # Otherwise we have to add the new values to the existing ones.625 # If the second structure is accumulated, then acculate all the626 # values into the first structure627 elsif ((ref $value2) eq "ARRAY") {628 # If the first metadata element is a scalar we have to629 # convert it into an array before we add anything more.630 if ((ref $value1) ne 'ARRAY') {631 $mdref1->{$key} = [$value1];632 $value1 = $mdref1->{$key};633 }634 # Now add the value(s) from the second array to the first635 $value2 = &clonedata($value2);636 push @$value1, @$value2;637 }638 # Finally, If the second structure is not an array erference, we639 # know it is in override mode, so override the first structure.640 else {641 $mdref1->{$key} = &clonedata($value2);642 }643 }644 }645 646 647 # Make a "cloned" copy of a metadata value.648 # This is trivial for a simple scalar value,649 # but not for an array reference.650 651 sub clonedata {652 my ($value) = @_;653 my $result;654 655 if ((ref $value) eq 'ARRAY') {656 $result = [];657 foreach my $item (@$value) {658 push @$result, $item;659 }660 } else {661 $result = $value;662 }663 return $result;664 }665 666 667 444 1;
Note:
See TracChangeset
for help on using the changeset viewer.