Changeset 31900


Ignore:
Timestamp:
08/17/17 20:29:16 (3 years ago)
Author:
ak19
Message:

When the oai-inf.db for a collection is first created, oaiinfo.pm now writes out a new special record 'earliesttimestamp' containing the current timestamp as at the time that oai-inf.db is created. If the oai-inf.db already exists, as with our demo collection on svn, the earliesttimestamp record will have a timestamp that is the earliest lastmodified date in the oai-inf.db.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/oaiinfo.pm

    r31723 r31900  
    66use constant INFO_DATESTAMP_INDEX => 2;
    77
     8my $OID_EARLIEST_TIMESTAMP = "earliesttimestamp";
     9  # Declaring as my $OID_EARLIEST_TIMESTAMP rather than constant, because it's not straightforward
     10  # to use string constant as hash key (need to concat with empty str).
     11  # http://perldoc.perl.org/constant.html
     12  # But beware of using perl 'constant' as hash key:
     13  # https://stackoverflow.com/questions/96848/is-there-any-way-to-use-a-constant-as-hash-key-in-perl
     14  # http://forums.devshed.com/perl-programming-6/massive-using-constants-hash-keys-603600.html
     15  # https://perlmaven.com/constants-and-read-only-variables-in-perl
     16  # http://neilb.org/reviews/constants.html - compares different ways to declare constants in perl
     17
    818use strict;
    919
     
    1626
    1727# File format read in: OID <tab> (Deletion-)Status <tab> Timestamp <tab> Datestamp
     28
     29# A special record of the db contains the timestamp of the creation of the oai-inf.db for
     30# the collection, representing the collection's earliest datetimestamp.
     31# This record has $OID_EARLIEST_TIMESTAMP for OID.
     32# Its deletion status is maintained at NA, not applicable.
     33# In cases of older oai-inf.db files where there's no earliesttimestamp field, this record
     34# is also created but with timestamp set to the oldest lastmodified date in oai-inf.db.
    1835
    1936# Deletion status can be:
     
    2138#  D = Doc with OID has been deleted. Timestamp indicates time of deletion
    2239#  PD = Provisionally Deleted. The associated timestamps are momentarily unaltered.
     40#  NA = Not Applicable. Only for the special record with $OID_EARLIEST_TIMESTAMP as OID.
    2341
    2442# oaidb is "always incremental": always reflects the I/B/R/D status of archive info db,
     
    187205
    188206#    print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
    189    
    190     return $do_pd_step;
     207
     208    return ($do_pd_step, $initdb);
    191209}
    192210
     
    200218    my ($removeold, $have_manifest) = @_;
    201219   
    202     my $do_pd_step = $self->init_tmpdb($removeold, $have_manifest);
    203       # returns 1 if the step to mark oaidb entries as PD is required
     220    my ($do_pd_step, $is_new_db) = $self->init_tmpdb($removeold, $have_manifest);
     221      # returns 1 for $do_pd_step if the step to mark oaidb entries as PD is required
    204222      # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db,
    205223      # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out
     224      # Returns 1 for is_new_db to allow further one time initialisation of the new oai-inf.db
    206225
    207226    $self->load_info();
    208227    $self->print_info(); # DEBUGGING
    209228
     229    # A special record of the oai-inf.db will contain the timestamp when the oai-inf.db was created.
     230    # This represents the collection's "earliest datetimestamp". It should remain unaltered
     231    # for as long as oai-inf db exists. This record has the special OID of $OID_EARLIEST_TIMESTAMP.
     232    # This record should not be marked as PD, but remain as E, as it can't ever be deleted.
     233    # Although the status field for the $OID_EARLIEST_TIMESTAMP record is actually meaningless.   
     234    my $save_to_db = $self->insert_coll_earliest_timestamp($is_new_db);   
     235   
    210236    if ($do_pd_step) {
    211237    $self->mark_all_existing_as_provisionallydeleted();
    212238    $self->print_info(); # DEBUGGING
    213 
    214     # save to db file now that we're done
    215     $self->save_info();
     239   
     240    $save_to_db = 1;   
     241    }
     242
     243    if($save_to_db) {
     244    # save changes to $self->{'info'} out to db file, now that we're done
     245    $self->save_info();
    216246    }
    217247
     
    238268    # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db. 
    239269
    240     my $arcinfo_map = $archive_info->{'info'};
     270    my $arcinfo_map = $archive_info->{'info'};
    241271
    242272    foreach my $OID (keys %$arcinfo_map) {
     
    259289        # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here.
    260290    } else {
    261         if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) {
    262             print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
    263         }
     291        if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) {
     292        print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
     293        }
    264294    }
    265295    }
     
    297327   
    298328    } else {
    299         if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) {
    300             print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
    301             print STDERR "@@@@@   No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
    302             print STDERR "@@@@@   to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
    303         }
     329    if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) {
     330        print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
     331        print STDERR "@@@@@   No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
     332        print STDERR "@@@@@   to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
     333    }
    304334    }
    305335}
     
    316346    print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
    317347
    318     my $infomap = $self->{'info'};
     348    my $infomap = $self->{'info'};
    319349
    320350    foreach my $OID (keys %$infomap) { # Mac Mountain Lion wants %$map, won't accept %$self->{'info'}
     
    334364    print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
    335365
    336     my $infomap = $self->{'info'};
     366    my $infomap = $self->{'info'};
    337367
    338368    foreach my $OID (keys %$infomap) {
     
    351381    my $self = shift (@_);
    352382
    353     if ($self->{'verbosity'} < $self->{'verbosity_threshold'}) {
    354         return;
    355     }   
     383    if ($self->{'verbosity'} < $self->{'verbosity_threshold'}) {
     384    return;
     385    }
    356386   
    357387    print STDERR "###########################################################\n";
    358388    print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n";
    359389   
    360     my $infomap = $self->{'info'};
     390    my $infomap = $self->{'info'};
    361391
    362392    foreach my $OID (keys %$infomap) {
     
    369399
    370400    print STDERR "###########################################################\n";
     401}
     402
     403
     404# When a fresh oai-inf.db is created, this method is called to add the db's special
     405# record representing the collection's earliest timestamp.
     406# OID=$OID_EARLIEST_TIMESTAMP, deletion_status=NA for not applicable, and current timestamp/date.
     407# For older oai-inf.db's that don't yet have this record, a record will be added too,
     408# but with the timestamp set to the oldest last modified date for the collection's docs.
     409sub insert_coll_earliest_timestamp {
     410    my $self = shift (@_);
     411    my ($is_new_db) = @_;
     412
     413    my $current_time = $self->get_current_time();
     414    my $save_to_db = 0;
     415
     416   
     417    print STDERR "@@@@@ oaiinfo::insert_coll_earliest_timestamp(): " if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
     418   
     419    if($is_new_db) {
     420   
     421    print STDERR "New db. Setting timestamp of oai-inf.db creation.\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
     422   
     423    $self->set_info($OID_EARLIEST_TIMESTAMP, "NA", $current_time);
     424    $save_to_db = 1;
     425    }
     426   
     427    else { # oai-inf.db already exists, ensure it has an [$OID_EARLIEST_TIMESTAMP] set
     428
     429    my $earliesttimestamp_record = $self->{'info'}->{$OID_EARLIEST_TIMESTAMP};
     430   
     431    if (!defined $earliesttimestamp_record) {
     432        # oai-inf.db exists, but doesn't contain an [$OID_EARLIEST_TIMESTAMP] record yet.
     433        # Let's create one for it:
     434        # Work out the earliest lastmodified datetime in the collection, by inspecting
     435        # the last modified timestamp for each doc in the collection
     436       
     437        my $earliest_timestamp = $current_time;
     438       
     439        my $infomap = $self->{'info'}; # Mac Mountain Lion wants %$map, won't accept %$self->{'info'}       
     440        foreach my $OID (keys %$infomap) {
     441        my $OID_info = $self->{'info'}->{$OID};
     442        my $lastmodified = $OID_info->[INFO_TIMESTAMP_INDEX];
     443        if($lastmodified < $earliest_timestamp) {
     444            $earliest_timestamp = $lastmodified;
     445        }
     446        }
     447       
     448        print STDERR "Collection timestamp not yet set for $OID_EARLIEST_TIMESTAMP. Setting to earliest found: $earliest_timestamp\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
     449       
     450        $self->set_info($OID_EARLIEST_TIMESTAMP, "NA", $earliest_timestamp);
     451        $save_to_db = 1;
     452    } else {
     453        print STDERR "Collection timestamp was already set\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'};
     454    }
     455    }
     456
     457    return $save_to_db;
    371458}
    372459
     
    422509
    423510    # the following method will set to current time if no timestamp provided,
    424     # But by explicit here, the code is easier to follow
     511    # But by being explicit here, the code is easier to follow
    425512    $self->set_info($OID, "D", $self->get_current_time());
    426513
     
    540627
    541628    # write out again. Open file for overwriting, not appending.
    542     # Then write out data structure $self->{'info'} that's been maintaining the data in-memory.
     629    # Then write out data structure $self->{'info'} that has been maintaining the data in-memory.
    543630    my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename);
    544631
    545     my $infomap = $self->{'info'};
     632    my $infomap = $self->{'info'};
    546633    foreach my $oid ( keys %$infomap ) {
    547634    my $OID_info = $self->{'info'}->{$oid};
     
    599686    my @list = ();
    600687   
    601     my $infomap = $self->{'info'};
     688    my $infomap = $self->{'info'};
    602689    foreach my $OID (keys %$infomap) { 
    603690    my $OID_info = $self->{'info'}->{$OID};
     
    617704sub size {
    618705    my $self = shift (@_);
    619     my $infomap = $self->{'info'};
     706    my $infomap = $self->{'info'};
    620707    return (scalar keys %$infomap);
    621708}
Note: See TracChangeset for help on using the changeset viewer.