Changeset 19493


Ignore:
Timestamp:
05/18/09 10:57:05 (12 years ago)
Author:
davidb
Message:

Introduction of new extrametafile to track which metadata.xml file a piece of metadata came from

Location:
gsdl/trunk/perllib/plugins
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r18659 r19493  
    169169sub metadata_read {
    170170    my $self = shift (@_);
    171     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     171    my ($pluginfo, $base_dir, $file, $block_hash,
     172    $extrametakeys, $extrametadata, $extrametafile,
     173    $processor, $maxdocs, $gli) = @_;
    172174
    173175    return undef;
  • gsdl/trunk/perllib/plugins/BasePlugin.pm

    r19222 r19493  
    906906sub metadata_read {
    907907    my $self = shift (@_);
    908     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     908    my ($pluginfo, $base_dir, $file, $block_hash,
     909    $extrametakeys, $extrametadata, $extrametafile,
     910    $processor, $maxdocs, $gli) = @_;
    909911   
    910912    # can we process this file??
  • gsdl/trunk/perllib/plugins/DSpacePlugin.pm

    r17300 r19493  
    240240sub metadata_read {
    241241    my $self = shift (@_);
    242     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     242    my ($pluginfo, $base_dir, $file, $block_hash,
     243    $extrametakeys, $extrametadata, $extrametafile,
     244    $processor, $maxdocs, $gli) = @_;
    243245
    244246    my $only_first_doc = $self->{'only_first_doc'};
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r19178 r19493  
    326326sub metadata_read {
    327327    my $self = shift (@_);
    328     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     328    my ($pluginfo, $base_dir, $file, $block_hash,
     329    $extrametakeys, $extrametadata, $extrametafile,
     330    $processor, $maxdocs, $gli) = @_;
    329331
    330332    return undef;
     
    395397    my $additionalmetadata = 0;      # is there extra metadata available?
    396398    my %extrametadata;               # maps from filespec to extra metadata keys
     399    my %extrametafile;               # maps from filespec to the metadata.xml (or similar) file it came from
    397400    my @extrametakeys;               # keys of %extrametadata in order read
    398401
     
    411414        my $extrakeys_re  = $ek->{'re'};
    412415        my $extrakeys_md  = $ek->{'md'};
     416        my $extrakeys_mf  = $ek->{'mf'};
    413417        push(@extrametakeys,$extrakeys_re);
    414418        $extrametadata{$extrakeys_re} = $extrakeys_md;
     419        $extrametafile{$extrakeys_re} = $extrakeys_mf;
    415420    }
    416421    delete($self->{'subdir_extrametakeys'}->{$local_dirname});
     
    437442                      $file_subfile,$block_hash,
    438443                      \@extrametakeys, \%extrametadata,
     444                      \%extrametafile,
    439445                      $processor, $maxdocs, $gli);
    440446    $additionalmetadata = 1;
     
    455461    if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
    456462        my $md = $extrametadata{$ek};
     463        my $mf = $extrametafile{$ek};
    457464
    458465        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
    459466
    460         my $subdir_rec = { 're' => $subdir_re, 'md' => $md };
     467        my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
    461468
    462469        # when its looked up, it must be relative to the base dir
     
    562569    # Next add metadata read in XML files (if it is supplied)
    563570    if ($additionalmetadata == 1) {
    564         my ($filespec, $mdref);
    565         foreach $filespec (@extrametakeys) {
     571        foreach my $filespec (@extrametakeys) {
    566572        ## use the utf8 encoded filename to do the filename comparison
    567573        if ($tmpfile =~ /^$filespec$/) {
    568574            print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
    569575            if ($verbosity > 2);
    570             $mdref = $extrametadata{$filespec};
     576            my $mdref = $extrametadata{$filespec};
     577            my $mfref = $extrametafile{$filespec};
     578
     579            # Add the list files where the metadata came from
     580            # into the metadata table so we can track this
     581            # This mechanism is similar to how gsdlassocfile works
     582
     583            my @metafile_pair = ();
     584            foreach my $l (keys %$mfref) {
     585            my $f = $mfref->{$l};
     586            push (@metafile_pair, "$f:$l");
     587            }
     588
     589            $mdref->{'gsdlmetafile'} = \@metafile_pair;
     590
    571591            &metadatautil::combine_metadata_structures($out_metadata, $mdref);
    572592        }
     
    583603
    584604
    585         my $inf_timestamp = $self->{'inf_timestamp'};
    586 
    587605        if (! -d $full_filename) {
    588             my $filename_timestamp = -M $full_filename;
    589             if ($filename_timestamp > $inf_timestamp) {
     606            if (!$block_hash->{'reindex_files'}->{$full_filename}) {
    590607            # filename has been around for longer than inf
    591608            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
     
    602619            # mark to doc-oids that rely on it for re-indexing
    603620            ## &GDBMUtils::gdbmDatabase();
    604 
    605621            }
    606622        }
  • gsdl/trunk/perllib/plugins/LOMPlugin.pm

    r17300 r19493  
    118118sub metadata_read {
    119119    my $self = shift (@_);
    120     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     120    my ($pluginfo, $base_dir, $file, $block_hash,
     121    $extrametakeys, $extrametadata, $extrametafile,
     122    $processor, $maxdocs, $gli) = @_;
    121123
    122124    my $outhandle = $self->{'outhandle'};
  • gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm

    r17717 r19493  
    7979{
    8080    my $self = shift (@_);
    81     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     81    my ($pluginfo, $base_dir, $file, $block_hash,
     82    $extrametakeys, $extrametadata, $extrametafile,
     83    $processor, $maxdocs, $gli) = @_;
    8284
    8385    # Read metadata from CSV files
  • gsdl/trunk/perllib/plugins/MetadataEXIFPlugin.pm

    r19215 r19493  
    168168{
    169169  my $self = shift (@_);
    170   my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     170  my ($pluginfo, $base_dir, $file, $block_hash,
     171      $extrametakeys, $extrametadata, $extrametafile,
     172      $processor, $maxdocs, $gli) = @_;
    171173 
    172174
  • gsdl/trunk/perllib/plugins/MetadataXMLPlugin.pm

    r17300 r19493  
    167167{
    168168    my $self = shift (@_);
    169     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata,
    170 $processor, $maxdocs, $gli) = @_;
     169    my ($pluginfo, $base_dir, $file, $block_hash,
     170    $extrametakeys, $extrametadata,$extrametafile,
     171    $processor, $maxdocs, $gli) = @_;
    171172
    172173    my $filename = &util::filename_cat($base_dir, $file);
     
    174175        return undef;
    175176    }
     177
     178    $self->{'metadata-file'} = $file;
     179    $self->{'metadata-filename'} = $filename;
    176180
    177181    my $outhandle = $self->{'outhandle'};
     
    182186    $block_hash->{'file_blocks'}->{$filename} = 1;
    183187    $self->{'metadataref'} = $extrametadata;
     188    $self->{'metafileref'} = $extrametafile;
    184189    $self->{'metakeysref'} = $extrametakeys;
    185190   
     
    236241        my $file_metadata = $self->{'metadataref'}->{$target};
    237242        my $saved_metadata = $self->{'saved_metadata'};
     243
    238244        if (!defined $file_metadata) {
    239245        $self->{'metadataref'}->{$target} = $saved_metadata;
     
    245251        &metadatautil::combine_metadata_structures($file_metadata,$saved_metadata);
    246252        }
     253
     254       
     255        # now record which metadata.xml file it came from
     256
     257        my $file = $self->{'metadata-file'};
     258        my $filename = $self->{'metadata-filename'};
     259
     260        if (!defined $self->{'metafileref'}->{$target}) {
     261        $self->{'metafileref'}->{$target} = {};
     262        }
     263
     264        $self->{'metafileref'}->{$target}->{$file} = $filename
    247265    }
    248266    }
  • gsdl/trunk/perllib/plugins/OAIPlugin.pm

    r19213 r19493  
    183183    my $self = shift (@_); 
    184184
    185     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     185    my ($pluginfo, $base_dir, $file, $block_hash,
     186    $extrametakeys, $extrametadata, $extrametafile,
     187    $processor, $maxdocs, $gli) = @_;
    186188
    187189    # can we process this file??
  • gsdl/trunk/perllib/plugins/PDFPlugin.pm

    r18406 r19493  
    148148        push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    149149        push(@$pagedimg_options, "-screenviewsize", "1000");
     150        push(@$pagedimg_options, "-enable_cache");
    150151    }
    151152    }
  • gsdl/trunk/perllib/plugins/SplitTextFile.pm

    r18327 r19493  
    118118sub metadata_read {
    119119    my $self = shift (@_); 
    120     my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     120    my ($pluginfo, $base_dir, $file, $block_hash,
     121    $extrametakeys, $extrametadata, $extrametafile,
     122    $processor, $maxdocs, $gli) = @_;
    121123
    122124    # returns 1 if matches process_exp, and has done blocking in the meantime
     
    124126                          $block_hash,
    125127                          $extrametakeys,
    126                           $extrametadata, $processor,
    127                           $maxdocs, $gli);
     128                          $extrametadata,
     129                          $extrametafile,
     130                          $processor, $maxdocs, $gli);
    128131    my $split_matched = undef;
    129132
Note: See TracChangeset for help on using the changeset viewer.