Changeset 31192 for main/trunk/greenstone2/perllib/oaiinfo.pm
- Timestamp:
- 2016-12-09T22:29:13+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/oaiinfo.pm
r31191 r31192 4 4 use constant INFO_STATUS_INDEX => 0; 5 5 use constant INFO_TIMESTAMP_INDEX => 1; 6 7 use constant NO_PD_STEP => 0; # no needing to mark db with "provisionally deleted", no PD pass8 use constant DO_PD_STEP => 1; # PD pass required (pass to mark db "PD" and then later undo it).9 # or SPECIAL_PROCESSING_REQUIRED => 1???10 6 11 7 use strict; … … 71 67 } 72 68 73 if($removeold) { 74 $self->{'removeold_no_manifest'} = 1; 75 $self->{'do_pd_step'} = DO_PD_STEP; # step where all E will be marked as PD 76 } else { 77 $self->{'removeold_no_manifest'} = 0; 78 $self->{'do_pd_step'} = NO_PD_STEP; 79 } 69 my $do_pd_step = ($removeold) ? 1 : 0; 70 # if $removeold, then proper full rebuild, will carry out step where all E will be marked as PD 71 # else some kind of incremental build, won't do the extra PD pass 72 # which is the step marking existing OIDs (E) as PD (provisionally deleted) 80 73 81 74 my $oaidb_live_filepath = $self->{'oaidb_live_filepath'}; … … 85 78 my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath); 86 79 my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath); 87 88 # print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";89 # print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";90 80 91 81 my $initdb = 0; … … 143 133 144 134 if($work_with_empty_tmpdb) { # we'll use an empty tmpdb 145 135 146 136 # If importing the collection for the very first time, neither db exists, 147 137 # so create an empty tmpdb. … … 161 151 # "keepold" (keepold means "only add, don't reprocess existing"). So 162 152 # no need to do the special passes dealing with "provisional deletes". 163 $ self->{'do_pd_step'} = NO_PD_STEP;153 $do_pd_step = 0; 164 154 165 155 } elsif ($make_contents_of_tmpdb_that_of_livedb) { … … 195 185 # print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n"; 196 186 197 return $self->{'do_pd_step'}; 198 } 199 200 sub set_proc_mode { 201 my $self = shift (@_); 202 my ($mode) = @_; 203 204 $self->{'do_pd_step'} = $mode; 205 } 206 207 # returns 1 if doing PD step (marking entries as provisionally deleted) 208 # returns 0 if not doing PD step, which happens when we're purely incremental or building first time. 209 sub get_proc_mode { 210 my $self = shift (@_); 211 return $self->{'do_pd_step'}; 187 return $do_pd_step; 212 188 } 213 189 … … 221 197 my ($removeold, $have_manifest) = @_; 222 198 223 224 199 my $do_pd_step = $self->init_tmpdb($removeold, $have_manifest); 225 # 1 if the step to mark oaidb entries as PD is required 226 # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db, 227 # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out 228 229 200 # returns 1 if the step to mark oaidb entries as PD is required 201 # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db, 202 # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out 203 230 204 $self->load_info(); 231 205 $self->print_info(); # DEBUGGING … … 320 294 } 321 295 322 ############################### FOR FULL AND INCR BUILDING ##########################323 # add or reindex if incremental (updating timestamp to current),324 # add or (re-)mark PDs as status=E for existing if full build, keeping timestamp.325 # This subroutine will end up adding if firstbuild (since then the OID won't already exist)326 sub Xprocess_new_or_existing {327 my $self = shift (@_);328 my ($OID) = @_;329 330 my $OID_info = $self->{'info'}->{$OID};331 if (defined $OID_info) {332 333 if ($self->{'do_pd_step'} == NO_PD_STEP) { # update modification time for reindexed doc334 $OID_info->[INFO_TIMESTAMP_INDEX] = $self->get_current_time();335 }336 else { # mode is DO_PD_STEP337 $OID_info->[INFO_STATUS_INDEX] = "E"; # will flip any PD status of existing documents back to E,338 # so need to keep existing timestamps.339 # Can't be called on deleted docs, so status was never D340 # $self->set_status_info($OID, "E"); # will only set it if OID exists341 342 343 }344 } else { # if OID is not present, then it's new and now added as existing from current time on345 $self->set_info($OID, "E", $self->get_current_time());346 }347 348 }349 350 296 ##################### SPECIFIC TO PD-STEP #################### 351 297 … … 360 306 361 307 foreach my $OID (keys $self->{'info'}) { 362 my $curr_status = $self->get_status_info($OID); 363 if(defined $curr_status && $curr_status eq "E") { 364 $self->set_status_info($OID, "PD"); 308 my $OID_info = $self->{'info'}->{$OID}; 309 my $curr_status = $OID_info->[INFO_STATUS_INDEX]; 310 if($curr_status eq "E") { 311 $OID_info->[INFO_STATUS_INDEX] = "PD"; 365 312 } 366 313 } … … 375 322 376 323 foreach my $OID (keys $self->{'info'}) { 377 my $curr_status = $self->get_status_info($OID); 378 if(defined $curr_status && $curr_status eq "PD") { 324 my $OID_info = $self->{'info'}->{$OID}; 325 my $curr_status = $OID_info->[INFO_STATUS_INDEX]; 326 if($curr_status eq "PD") { 379 327 $self->set_info($OID, "D", $self->get_current_time()); 380 328 } … … 382 330 } 383 331 384 # find the OID, if it exists, make its status=E for existing.385 sub Xset_status_to_existing_if_OID_present {386 my $self = shift (@_);387 my ($OID) = @_;388 389 $self->set_status_info($OID, "E"); # will only set it if OID exists390 }391 332 392 333 ##################### GENERAL, NOT SPECIFIC TO PD-STEP #################### … … 438 379 439 380 } 440 441 381 442 382 # Does the same as index(): … … 489 429 @lineparts = split ("\t", $line); 490 430 if (scalar(@lineparts) >= 2) { 491 $self-> add_info (@lineparts);431 $self->set_info (@lineparts); 492 432 } 493 433 } 494 434 close (INFILE); 495 435 } 496 497 436 498 437 } … … 514 453 my ($timestamp) = ($vals=~/^<timestamp>(.*)$/m); 515 454 516 $self-> add_info ($oid, $deletion_status, $timestamp);455 $self->set_info ($oid, $deletion_status, $timestamp); 517 456 } 518 457 } … … 599 538 } 600 539 601 sub delete_info {602 my $self = shift (@_);603 my ($OID) = @_;604 605 print STDERR "@@@@ ERROR oaiinfo::delete_info: Not allowed to delete entries in oai DB.\n";606 607 #if (defined $self->{'info'}->{$OID}) {608 # delete $self->{'info'}->{$OID};609 #}610 }611 540 612 541 sub set_info { # sets existing or appends … … 620 549 } 621 550 622 sub add_info { # appends iff it doesn't exist already623 my $self = shift (@_);624 my ($OID, $del_status, $timestamp) = @_;625 626 if (defined($self->{'info'}->{$OID})) {627 print STDERR "@@@@ ERROR oaiinfo::add_info: id $OID already exists. Not adding.\n";628 } else {629 $self->set_info($OID, $del_status, $timestamp);630 }631 }632 633 sub set_status_info {634 my $self = shift (@_);635 my ($OID, $del_status) = @_;636 637 my $OID_info = $self->{'info'}->{$OID};638 if (defined $OID_info) {639 $OID_info->[INFO_STATUS_INDEX] = $del_status;640 }641 else {642 print STDERR "oaiinfo::set_status_info: Unable to find OAI document id $OID\n";643 }644 }645 646 sub get_status_info {647 my $self = shift (@_);648 my ($OID) = @_;649 650 my $del_status = undef;651 652 my $OID_info = $self->{'info'}->{$OID};653 if (defined $OID_info) {654 $del_status = $OID_info->[INFO_STATUS_INDEX];655 }656 else {657 print STDERR "oaiinfo::get_status_info: Unable to find OAI document id $OID\n";658 }659 660 return $del_status;661 }662 663 # if no timestamp provided, uses current timestamp664 sub set_timestamp_info {665 my $self = shift (@_);666 my ($OID, $timestamp) = @_;667 668 my $OID_info = $self->{'info'}->{$OID};669 if (defined $OID_info) {670 if(!defined $timestamp) {671 $timestamp = $self->get_current_time();672 }673 $OID_info->[INFO_TIMESTAMP_INDEX] = $timestamp;674 }675 else {676 print STDERR "oaiinfo::set_timestamp_info: Unable to find OAI document id $OID\n";677 }678 }679 680 sub get_timestamp_info {681 my $self = shift (@_);682 my ($OID) = @_;683 684 my $timestamp = undef;685 686 my $OID_info = $self->{'info'}->{$OID};687 if (defined $OID_info) {688 $timestamp = $OID_info->[INFO_TIMESTAMP_INDEX];689 }690 else {691 print STDERR "oaiinfo::get_timestamp_info: Unable to find OAI document id $OID\n";692 }693 694 return $timestamp;695 }696 697 551 698 552 # returns a list of the form [[OID, timestamp, deletion_status], ...] … … 714 568 715 569 716 717 # returns an array/list of the form [deletion_status, timestamp]718 sub get_info {719 my $self = shift (@_);720 my ($OID) = @_;721 722 if (defined $self->{'info'}->{$OID}) {723 return $self->{'info'}->{$OID};724 }725 726 return undef;727 }728 729 730 731 570 # returns the number of entries so far, including deleted ones 732 571 # http://stackoverflow.com/questions/1109095/how-can-i-find-the-number-of-keys-in-a-hash-in-perl
Note:
See TracChangeset
for help on using the changeset viewer.