Changeset 13188


Ignore:
Timestamp:
2006-10-30T14:00:16+13:00 (18 years ago)
Author:
kjdon
Message:

RecPlug no longer handles reading metadata.xml files. It is purely there to recurse directories. It still calls metadata_read inside each directory - can get rid of this once we have a global metadata pass. MetadataXMLPlug now handles reading metadata.xml files

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/RecPlug.pm

    r12969 r13188  
    9797use plugin;
    9898use util;
     99use metadatautil;
    99100
    100101use File::Basename;
     
    104105BEGIN {
    105106    @RecPlug::ISA = ('BasPlug');
    106     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    107 }
    108 
    109 use XMLParser;
     107}
    110108
    111109my $arguments =
     
    118116    'desc' => "{RecPlug.use_metadata_files}",
    119117    'type' => "flag",
    120     'reqd' => "no" },
     118    'reqd' => "no",
     119    'hiddengli' => "yes" },
    121120      { 'name' => "recheck_directories",
    122121    'desc' => "{RecPlug.recheck_directories}",
    123122    'type' => "flag",
    124123    'reqd' => "no" } ];
    125 
     124   
    126125my $options = { 'name'     => "RecPlug",
    127126        'desc'     => "{RecPlug.desc}",
     
    130129        'args'     => $arguments };
    131130
    132 
    133 my ($self);
    134 
    135131sub new {
    136132    my ($class) = shift (@_);
     
    141137    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    142138
    143     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     139    my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     140   
     141    if ($self->{'info_only'}) {
     142    # don't worry about any options or initialisations etc
     143    return bless $self, $class;
     144    }
     145
     146    # we have left this option in so we can warn people who are still using it
    144147    if ($self->{'use_metadata_files'}) {
    145     # create XML::Parser object for parsing metadata.xml files
    146     my $parser = new XML::Parser('Style' => 'Stream',
    147                      'Handlers' => {'Char' => \&Char,
    148                             'Doctype' => \&Doctype
    149                             });
    150 
    151     $self->{'parser'} = $parser;
    152     $self->{'in_filename'} = 0;
    153     }
    154    
     148    die "ERROR: RecPlug -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
     149    }
     150       
    155151    $self->{'subdir_extrametakeys'} = {};
    156152
     
    265261    }
    266262
    267     # read XML metadata files (if supplied)
     263    # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
     264   
    268265    my $additionalmetadata = 0;      # is there extra metadata available?
    269266    my %extrametadata;               # maps from filespec to extra metadata keys
     
    287284    }
    288285    delete($self->{'subdir_extrametakeys'}->{$local_dirname});
    289     }
    290 
    291     if ($read_metadata_files) {
    292     #read the directory "metadata.xml" file
    293     my $metadatafile = &util::filename_cat ($dirname, 'metadata.xml');
    294     if (-e $metadatafile) {
    295         print $outhandle "RecPlug: found metadata in $metadatafile\n"
    296         if ($verbosity);
    297         $self->read_metadata_xml_file($metadatafile, \%extrametadata, \@extrametakeys);
    298         $additionalmetadata = 1;
    299     }
    300286    }
    301287   
     
    308294    last if ($maxdocs != -1 && $count >= $maxdocs);
    309295    next if ($subfile =~ m/^\.\.?$/);
    310     #next if ($read_metadata_files && $subfile =~ /metadata\.xml$/);
    311296
    312297    # Recursively read each $subfile
     
    377362    last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
    378363    next if ($subfile =~ /^\.\.?$/);
    379     next if ($read_metadata_files && $subfile =~ /metadata\.xml$/);
    380364
    381365    # Follow Windows shortcuts
     
    414398    # Make a copy of $in_metadata to pass to $subfile
    415399    $out_metadata = {};
    416     &combine_metadata_structures($out_metadata, $in_metadata);
     400    &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
    417401
    418402    # Next add metadata read in XML files (if it is supplied)
     
    425409            if ($verbosity > 2);
    426410            $mdref = $extrametadata{$filespec};
    427             &combine_metadata_structures($out_metadata, $mdref);
     411            &metadatautil::combine_metadata_structures($out_metadata, $mdref);
    428412        }
    429413        }
     
    458442}
    459443
    460 
    461 
    462 # Read a manually-constructed metadata file and store the data
    463 # it contains in the $metadataref structure.
    464 #
    465 # (metadataref is a reference to a hash whose keys are filenames
    466 # and whose values are metadata hash structures.) 
    467 
    468 sub read_metadata_xml_file {
    469     my $self = shift(@_);
    470     my ($filename, $metadataref, $metakeysref) = @_;
    471     $self->{'metadataref'} = $metadataref;
    472     $self->{'metakeysref'} = $metakeysref;
    473    
    474     eval {
    475     $self->{'parser'}->parsefile($filename);
    476     };
    477 
    478     if ($@) {
    479     die "RecPlug: ERROR $filename is not a well formed metadata.xml file ($@)\n";
    480     }
    481 }
    482 
    483 sub Doctype {
    484     my ($expat, $name, $sysid, $pubid, $internal) = @_;
    485 
    486     # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
    487     # to be processed as well as the "DirectoryMetadata" files which should now
    488     # be created by import.pl
    489     die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
    490 }
    491 
    492 sub StartTag {
    493     my ($expat, $element) = @_;
    494 
    495     if ($element eq "FileSet") {
    496     $self->{'saved_targets'} = [];
    497     $self->{'saved_metadata'} = {};
    498     }
    499     elsif ($element eq "FileName") {
    500     $self->{'in_filename'} = 1;
    501     }
    502     elsif ($element eq "Metadata") {
    503     $self->{'metadata_name'} = $_{'name'};
    504     if ((defined $_{'mode'}) && ($_{'mode'} eq "accumulate")) {
    505         $self->{'metadata_accumulate'} = 1;
    506     } else {
    507         $self->{'metadata_accumulate'} = 0;
    508     }
    509     }
    510 }
    511 
    512 sub EndTag {
    513     my ($expat, $element) = @_;
    514 
    515     if ($element eq "FileSet") {
    516     push (@{$self->{'metakeysref'}}, @{$self->{'saved_targets'}});
    517     foreach my $target (@{$self->{'saved_targets'}}) {
    518         my $file_metadata = $self->{'metadataref'}->{$target};
    519         my $saved_metadata = $self->{'saved_metadata'};
    520         if (!defined $file_metadata) {
    521         $self->{'metadataref'}->{$target} = $saved_metadata;
    522         }
    523         else {
    524         $self->combine_metadata_structures($file_metadata,$saved_metadata);
    525         }
    526     }
    527     }
    528     elsif ($element eq "FileName") {
    529     $self->{'in_filename'} = 0;
    530     }
    531     elsif ($element eq "Metadata") {
    532     $self->{'metadata_name'} = "";
    533     }
    534 
    535 }
    536 
    537 sub store_saved_metadata
    538 {
    539     my $self = shift(@_);
    540     my ($mname,$mvalue,$md_accumulate) = @_;
    541 
    542     if (defined $self->{'saved_metadata'}->{$mname}) {
    543     if ($md_accumulate) {
    544         # accumulate mode - add value to existing value(s)
    545         if (ref ($self->{'saved_metadata'}->{$mname}) eq "ARRAY") {
    546         push (@{$self->{'saved_metadata'}->{$mname}}, $mvalue);
    547         } else {
    548         $self->{'saved_metadata'}->{$mname} =
    549             [$self->{'saved_metadata'}->{$mname}, $mvalue];
    550         }
    551     } else {
    552         # override mode
    553         $self->{'saved_metadata'}->{$mname} = $mvalue;
    554     }
    555     } else {
    556     if ($md_accumulate) {
    557         # accumulate mode - add value into (currently empty) array
    558         $self->{'saved_metadata'}->{$mname} = [$mvalue];
    559     } else {
    560         # override mode
    561         $self->{'saved_metadata'}->{$mname} = $mvalue;
    562     }
    563     }
    564 }
    565 
    566 
    567 sub Text {
    568 
    569     if ($self->{'in_filename'}) {
    570     # $_ == FileName content
    571     push (@{$self->{'saved_targets'}}, $_);
    572     }
    573     elsif (defined ($self->{'metadata_name'}) && $self->{'metadata_name'} ne "") {
    574     # $_ == Metadata content
    575     my $mname = $self->{'metadata_name'};
    576     my $mvalue = $_;
    577     my $md_accumulate = $self->{'metadata_accumulate'};
    578     $self->store_saved_metadata($mname,$mvalue,$md_accumulate);
    579     }
    580 }
    581 
    582 # This Char function overrides the one in XML::Parser::Stream to overcome a
    583 # problem where $expat->{Text} is treated as the return value, slowing
    584 # things down significantly in some cases.
    585 sub Char {
    586     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
    587     $_[0]->{'Text'} .= $_[1];
    588     return undef;
    589 }
    590 
    591 # Combine two metadata structures.  Given two references to metadata
    592 # element structures, add every field of the second ($mdref2) to the first
    593 # ($mdref1). 
    594 #
    595 # Afterwards $mdref1 will be updated, and $mdref2 will be unchanged.
    596 #
    597 # We have to be careful about the way we merge metadata when one metadata
    598 # structure is in "override" mode and one is in "merge" mode.  In fact, we
    599 # use the mode from the second structure, $mdref2, because it is generally
    600 # defined later (lower in the directory structure) and is therefore more
    601 # "local" to the document concerned.
    602 #
    603 # Another issue is the use of references to pass metadata around.  If we
    604 # simply copy one metadata structure reference to another, then we're
    605 # effectively just copyinga pointer, and changes to the new referene
    606 # will affect the old (copied) one also.  This also applies to ARRAY
    607 # references used as metadata element values (hence the "clonedata"
    608 # function below).
    609 
    610 sub combine_metadata_structures {
    611     my ($mdref1, $mdref2) = @_;
    612     my ($key, $value1, $value2);
    613 
    614     foreach $key (keys %$mdref2) {
    615 
    616     $value1 = $mdref1->{$key};
    617     $value2 = $mdref2->{$key};
    618    
    619     # If there is no existing value for this metadata field in
    620     # $mdref1, so we simply copy the value from $mdref2 over.
    621     if (!defined $value1) {
    622         $mdref1->{$key} = &clonedata($value2);
    623     }
    624     # Otherwise we have to add the new values to the existing ones.
    625     # If the second structure is accumulated, then acculate all the
    626     # values into the first structure
    627     elsif ((ref $value2) eq "ARRAY") {
    628         # If the first metadata element is a scalar we have to
    629         # convert it into an array before we add anything more.
    630         if ((ref $value1) ne 'ARRAY') {
    631         $mdref1->{$key} = [$value1];
    632         $value1 = $mdref1->{$key};
    633         }
    634         # Now add the value(s) from the second array to the first
    635         $value2 = &clonedata($value2);
    636         push @$value1, @$value2;
    637     }
    638     # Finally, If the second structure is not an array erference, we
    639     # know it is in override mode, so override the first structure.
    640     else {
    641         $mdref1->{$key} = &clonedata($value2);
    642     }
    643     }
    644 }
    645 
    646 
    647 # Make a "cloned" copy of a metadata value. 
    648 # This is trivial for a simple scalar value,
    649 # but not for an array reference.
    650 
    651 sub clonedata {
    652     my ($value) = @_;
    653     my $result;
    654 
    655     if ((ref $value) eq 'ARRAY') {
    656     $result = [];
    657     foreach my $item (@$value) {
    658         push @$result, $item;
    659     }
    660     } else {
    661     $result = $value;
    662     }
    663     return $result;
    664 }
    665 
    666 
    6674441;
Note: See TracChangeset for help on using the changeset viewer.