Ignore:
Timestamp:
2018-03-13T15:59:21+13:00 (6 years ago)
Author:
ak19
Message:

incremental building was not being incremental when no metadata was assigned to any of the files (as happens with our docs for quick test collections). A default metadata.xml is present, but 'empty' in that it contains no FileSet elements with metadata elements assigned to FileName elements. But we still want incremental behaviour. The idea was to write out an entry into archiveinf-src.db for each metadata.xml processed, not just for each meta.xml file actually referencing a doc, as BasePlugout was doing so far on a per doc basis. Kathy come up with the actual infrastructure that can make it work (to ensure all the necessary objects are available), Dr Bainbridge approved this, and it's now been added into the code.

Location:
main/trunk/greenstone2/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r31492 r32159  
    252252    $self->{'metadata-file'} = $file;
    253253    $self->{'metadata-filename'} = $filename;
    254    
     254   
    255255    my $outhandle = $self->{'outhandle'};
    256256   
    257257    print STDERR "\n<Processing n='$file' p='MetadataXMLPlugin'>\n" if ($gli);
    258258    print $outhandle "MetadataXMLPlugin: processing $file\n" if ($self->{'verbosity'})> 1;
     259
     260    # In order to prevent blind reprocessing of the same old docs upon *incremental* building
     261    # whenever we encounter a default empty metadata.xml that has no content defined (attaches
     262    # no meta), we write an entry for *each* metadata.xml into archiveinf-src.db
     263    print $outhandle "MetadataXMLPlugin: writing an entry for this metadata.xml into archiveinf-src.db\n" if ($self->{'verbosity'})> 1;
     264    $processor->add_metaxml_file_entry_to_archiveinfsrc($filename); # pass in the full filename, like BasePlugout::archiveinf_db() does
     265
     266   
    259267    # add the file to the block list so that it won't be processed in read, as we will do all we can with it here
    260268    $self->block_raw_filename($block_hash,$filename);
     
    277285    return -1; #error
    278286    }
    279 
     287   
    280288    return 1;
    281289
  • main/trunk/greenstone2/perllib/plugouts/BasePlugout.pm

    r29163 r32159  
    10011001    } 
    10021002
    1003     # meta files not set in reverese entry, but need to set the metadata flag
     1003    # meta files not set in reverse entry, but need to set the metadata flag
    10041004    if (defined $doc_obj->get_meta_files()) {
    10051005    foreach my $meta_file_rec(@{$doc_obj->get_meta_files()}) {
     
    10081008    }
    10091009    }
     1010}
     1011
     1012# This sub is called for every metadata.xml accepted for processing by by MetdataXMLPlugin
     1013# and adds an entry into archiveinf-src.db for that file in the form:
     1014#   [@THISCOLLECTPATH@/import/metadata.xml]
     1015#   <meta-file>1
     1016# This prevents blind reprocessing of the same old docs upon *incremental* building whenever
     1017# we encounter a default empty metadata.xml that has no actual <FileSet> content defined.
     1018sub add_metaxml_file_entry_to_archiveinfsrc {
     1019    my $self = shift (@_);
     1020    my ($full_file) = @_;
     1021   
     1022    print STDERR "**** Adding metaxml file entry for full_file: $full_file\n";
     1023    my $working_info = $self->{'output_info'};
     1024    $working_info->set_meta_file_flag($full_file);
    10101025}
    10111026
Note: See TracChangeset for help on using the changeset viewer.