Changeset 31900
- Timestamp:
- 2017-08-17T20:29:16+12:00 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/oaiinfo.pm
r31723 r31900 6 6 use constant INFO_DATESTAMP_INDEX => 2; 7 7 8 my $OID_EARLIEST_TIMESTAMP = "earliesttimestamp"; 9 # Declaring as my $OID_EARLIEST_TIMESTAMP rather than constant, because it's not straightforward 10 # to use string constant as hash key (need to concat with empty str). 11 # http://perldoc.perl.org/constant.html 12 # But beware of using perl 'constant' as hash key: 13 # https://stackoverflow.com/questions/96848/is-there-any-way-to-use-a-constant-as-hash-key-in-perl 14 # http://forums.devshed.com/perl-programming-6/massive-using-constants-hash-keys-603600.html 15 # https://perlmaven.com/constants-and-read-only-variables-in-perl 16 # http://neilb.org/reviews/constants.html - compares different ways to declare constants in perl 17 8 18 use strict; 9 19 … … 16 26 17 27 # File format read in: OID <tab> (Deletion-)Status <tab> Timestamp <tab> Datestamp 28 29 # A special record of the db contains the timestamp of the creation of the oai-inf.db for 30 # the collection, representing the collection's earliest datetimestamp. 31 # This record has $OID_EARLIEST_TIMESTAMP for OID. 32 # Its deletion status is maintained at NA, not applicable. 33 # In cases of older oai-inf.db files where there's no earliesttimestamp field, this record 34 # is also created but with timestamp set to the oldest lastmodified date in oai-inf.db. 18 35 19 36 # Deletion status can be: … … 21 38 # D = Doc with OID has been deleted. Timestamp indicates time of deletion 22 39 # PD = Provisionally Deleted. The associated timestamps are momentarily unaltered. 40 # NA = Not Applicable. Only for the special record with $OID_EARLIEST_TIMESTAMP as OID. 23 41 24 42 # oaidb is "always incremental": always reflects the I/B/R/D status of archive info db, … … 187 205 188 206 # print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 189 190 return $do_pd_step;207 208 return ($do_pd_step, $initdb); 191 209 } 192 210 … … 200 218 my ($removeold, $have_manifest) = @_; 201 219 202 my $do_pd_step= $self->init_tmpdb($removeold, $have_manifest);203 # returns 1 if the step to mark oaidb entries as PD is required220 my ($do_pd_step, $is_new_db) = $self->init_tmpdb($removeold, $have_manifest); 221 # returns 1 for $do_pd_step if the step to mark oaidb entries as PD is required 204 222 # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db, 205 223 # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out 224 # Returns 1 for is_new_db to allow further one time initialisation of the new oai-inf.db 206 225 207 226 $self->load_info(); 208 227 $self->print_info(); # DEBUGGING 209 228 229 # A special record of the oai-inf.db will contain the timestamp when the oai-inf.db was created. 230 # This represents the collection's "earliest datetimestamp". It should remain unaltered 231 # for as long as oai-inf db exists. This record has the special OID of $OID_EARLIEST_TIMESTAMP. 232 # This record should not be marked as PD, but remain as E, as it can't ever be deleted. 233 # Although the status field for the $OID_EARLIEST_TIMESTAMP record is actually meaningless. 234 my $save_to_db = $self->insert_coll_earliest_timestamp($is_new_db); 235 210 236 if ($do_pd_step) { 211 237 $self->mark_all_existing_as_provisionallydeleted(); 212 238 $self->print_info(); # DEBUGGING 213 214 # save to db file now that we're done 215 $self->save_info(); 239 240 $save_to_db = 1; 241 } 242 243 if($save_to_db) { 244 # save changes to $self->{'info'} out to db file, now that we're done 245 $self->save_info(); 216 246 } 217 247 … … 238 268 # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db. 239 269 240 270 my $arcinfo_map = $archive_info->{'info'}; 241 271 242 272 foreach my $OID (keys %$arcinfo_map) { … … 259 289 # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here. 260 290 } else { 261 262 263 291 if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) { 292 print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n"; 293 } 264 294 } 265 295 } … … 297 327 298 328 } else { 299 300 301 302 303 329 if ($self->{'verbosity'} >= $self->{'verbosity_threshold'}) { 330 print STDERR "@@@@@ In oaiinfo::activate_collection():\n"; 331 print STDERR "@@@@@ No tmpdb at $self->{'oaidb_tmp_filepath'}\n"; 332 print STDERR "@@@@@ to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n"; 333 } 304 334 } 305 335 } … … 316 346 print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 317 347 318 348 my $infomap = $self->{'info'}; 319 349 320 350 foreach my $OID (keys %$infomap) { # Mac Mountain Lion wants %$map, won't accept %$self->{'info'} … … 334 364 print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 335 365 336 366 my $infomap = $self->{'info'}; 337 367 338 368 foreach my $OID (keys %$infomap) { … … 351 381 my $self = shift (@_); 352 382 353 354 355 } 383 if ($self->{'verbosity'} < $self->{'verbosity_threshold'}) { 384 return; 385 } 356 386 357 387 print STDERR "###########################################################\n"; 358 388 print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n"; 359 389 360 390 my $infomap = $self->{'info'}; 361 391 362 392 foreach my $OID (keys %$infomap) { … … 369 399 370 400 print STDERR "###########################################################\n"; 401 } 402 403 404 # When a fresh oai-inf.db is created, this method is called to add the db's special 405 # record representing the collection's earliest timestamp. 406 # OID=$OID_EARLIEST_TIMESTAMP, deletion_status=NA for not applicable, and current timestamp/date. 407 # For older oai-inf.db's that don't yet have this record, a record will be added too, 408 # but with the timestamp set to the oldest last modified date for the collection's docs. 409 sub insert_coll_earliest_timestamp { 410 my $self = shift (@_); 411 my ($is_new_db) = @_; 412 413 my $current_time = $self->get_current_time(); 414 my $save_to_db = 0; 415 416 417 print STDERR "@@@@@ oaiinfo::insert_coll_earliest_timestamp(): " if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 418 419 if($is_new_db) { 420 421 print STDERR "New db. Setting timestamp of oai-inf.db creation.\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 422 423 $self->set_info($OID_EARLIEST_TIMESTAMP, "NA", $current_time); 424 $save_to_db = 1; 425 } 426 427 else { # oai-inf.db already exists, ensure it has an [$OID_EARLIEST_TIMESTAMP] set 428 429 my $earliesttimestamp_record = $self->{'info'}->{$OID_EARLIEST_TIMESTAMP}; 430 431 if (!defined $earliesttimestamp_record) { 432 # oai-inf.db exists, but doesn't contain an [$OID_EARLIEST_TIMESTAMP] record yet. 433 # Let's create one for it: 434 # Work out the earliest lastmodified datetime in the collection, by inspecting 435 # the last modified timestamp for each doc in the collection 436 437 my $earliest_timestamp = $current_time; 438 439 my $infomap = $self->{'info'}; # Mac Mountain Lion wants %$map, won't accept %$self->{'info'} 440 foreach my $OID (keys %$infomap) { 441 my $OID_info = $self->{'info'}->{$OID}; 442 my $lastmodified = $OID_info->[INFO_TIMESTAMP_INDEX]; 443 if($lastmodified < $earliest_timestamp) { 444 $earliest_timestamp = $lastmodified; 445 } 446 } 447 448 print STDERR "Collection timestamp not yet set for $OID_EARLIEST_TIMESTAMP. Setting to earliest found: $earliest_timestamp\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 449 450 $self->set_info($OID_EARLIEST_TIMESTAMP, "NA", $earliest_timestamp); 451 $save_to_db = 1; 452 } else { 453 print STDERR "Collection timestamp was already set\n" if $self->{'verbosity'} >= $self->{'verbosity_threshold'}; 454 } 455 } 456 457 return $save_to_db; 371 458 } 372 459 … … 422 509 423 510 # the following method will set to current time if no timestamp provided, 424 # But by explicit here, the code is easier to follow511 # But by being explicit here, the code is easier to follow 425 512 $self->set_info($OID, "D", $self->get_current_time()); 426 513 … … 540 627 541 628 # write out again. Open file for overwriting, not appending. 542 # Then write out data structure $self->{'info'} that 's been maintaining the data in-memory.629 # Then write out data structure $self->{'info'} that has been maintaining the data in-memory. 543 630 my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename); 544 631 545 632 my $infomap = $self->{'info'}; 546 633 foreach my $oid ( keys %$infomap ) { 547 634 my $OID_info = $self->{'info'}->{$oid}; … … 599 686 my @list = (); 600 687 601 688 my $infomap = $self->{'info'}; 602 689 foreach my $OID (keys %$infomap) { 603 690 my $OID_info = $self->{'info'}->{$OID}; … … 617 704 sub size { 618 705 my $self = shift (@_); 619 706 my $infomap = $self->{'info'}; 620 707 return (scalar keys %$infomap); 621 708 }
Note:
See TracChangeset
for help on using the changeset viewer.