Ignore:
Timestamp:
2009-02-01T14:35:22+13:00 (15 years ago)
Author:
davidb
Message:

Modifications for incremental building to support files that need to be deleted

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/import.pl

    r17751 r18440  
    287287    # other vars
    288288    my ($configfilename, $collection, $collectcfg,
    289     $archive_info_filename, $archive_info,
     289    $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,
    290290    $gs_mode,
    291291    $processor, $pluginfo);
     
    518518
    519519    # read the archive information file
    520     $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
     520##  $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");
     521
     522    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb";
     523    my $doc_db = "archiveinf-doc$db_ext";
     524    my $src_db = "archiveinf-src$db_ext";
     525    $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db);
     526    $arcinfo_src_filename = &util::filename_cat ($archivedir, $src_db);
    521527   
    522528    $archive_info = new arcinfo ();
    523     $archive_info->load_info ($archive_info_filename);
     529    $archive_info->load_info ($arcinfo_doc_filename);
    524530    if ($reversesort) {
    525531    $archive_info->reverse_sort();
     532    }
     533
     534    if ($manifest eq "") {
     535    # Load in list of files in import folder from last import (if present)
     536    $archive_info->load_import_filelist ($arcinfo_src_filename);
    526537    }
    527538
     
    560571    # gobal blocking pass may set up some metadata
    561572    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
     573    # Can now work out which files were deleted
     574
     575    # First convert all files to absolute form
     576    # This is to support the situation where the import folder is not
     577    # the default
     578
     579    my $prev_all_files = $archive_info->{'import_filelist'};
     580    foreach my $prev_file (keys %$prev_all_files) {
     581
     582        if (!&util::filename_is_absolute($prev_file)) {
     583        my $full_prev_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$prev_file);
     584        delete $prev_all_files->{$prev_file};
     585        $prev_all_files->{$full_prev_file} = 1;
     586        }
     587    }
     588
     589    # Figure out which are the new files, existing files and so
     590    # by implication the files from the previous import that are not
     591    # there any more => mark them for deletion
     592    foreach my $curr_file (keys %{$block_hash->{'all_files'}}) {
     593
     594        my $full_curr_file = $curr_file;
     595
     596        if (!&util::filename_is_absolute($curr_file)) {
     597        # add in import dir to make absolute
     598        $full_curr_file = &util::filename_cat($importdir,$curr_file);
     599        }
     600
     601##      print STDERR "**** Checking $curr_file\n";
     602
     603        # figure of if new file or not
     604        if (defined $prev_all_files->{$full_curr_file}) {
     605        # had it before
     606        $block_hash->{'existing_files'}->{$curr_file} = 1;
     607        # Now remove it, so by end of loop only the files
     608        # that need deleting are left
     609
     610        delete $prev_all_files->{$full_curr_file}
     611        }
     612        else {
     613        $block_hash->{'new_files'}->{$curr_file} = 1;
     614        }
     615
     616        delete $block_hash->{'all_files'}->{$curr_file};
     617    }
     618
     619    print STDERR "Delete files:\n  ";
     620
     621    my @delete_files = keys %$prev_all_files;
     622    print STDERR join("\n  ",@delete_files), "\n";
     623
    562624    &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
    563625    }
     
    565627    {
    566628    # process any files marked for importing
    567     foreach my $file (keys %{$manifest_lookup->{'import'}}) {
     629    foreach my $file (keys %{$manifest_lookup->{'index'}}) {
    568630        &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
    569631    }
     
    571633    # record files marked for deletion in arcinfo
    572634    foreach my $file (keys %{$manifest_lookup->{'delete'}}) {
    573         # consider finding it?
     635        # use 'archiveinf-src' GDBM file to look up all the OIDs
     636        # this file is used in (note in most cases, it's just one OID)
     637
     638        # An improvement would be to have the record read
     639        # into a hash array
     640        # gdbmRecordToHash
     641
     642        my $gdbm_val
     643        = &GDBMUtil::gdbmDatabaseGet($arcinfo_src_filename,$file);
     644
     645        my @oids = ($gdbm_val =~ m/^<oid>(.*)$/gm);
     646        foreach my $oid (@oids) {
     647
     648        # find out if it's an assoc file or main doc
     649
     650        # archiveinf-doc, lookup $oid
     651        # if "doc-file"
     652        #   mark it for deletion
     653
     654        # else (assoc file)
     655        #  mark all for re-indexing
     656
     657        # Now delete file
     658        }
     659
    574660        # $archive_info->add_info($OID,$doc_xml_file,"D");
    575661    }
     
    585671
    586672# The following 'if' statement is in the export.pl version of the script,
    587 # but not (so far) the import.pl version.  Why is this?
    588 ##    if ($saveas =~ m/^.*METS$/) {
     673# The reason for the 'if' statement is now given in export.pl
     674# Unclear at this point if the same should be done here
     675##    if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) {
     676    # Not all export types need this (e.g. DSpace)
     677
    589678    # should we still do this in debug mode??
    590679
    591     $archive_info->save_info($archive_info_filename);
     680    # for backwards compatability with archvies.inf file
     681    if ($arcinfo_doc_filename =~ m/\.inf$/) {
     682    $archive_info->save_info($arcinfo_doc_filename);
     683    }
     684
    592685##    }
    593686   
Note: See TracChangeset for help on using the changeset viewer.