root/main/trunk/greenstone2/perllib/oaiinfo.pm @ 31190

Revision 31190, 25.7 KB (checked in by ak19, 3 years ago)

First major commit to do with the new oaiinfo db that keeps track of which oids are deleted. Still need to fix up issue with the new remove and rename subroutines of dbutil's jdbm not being called to clean up *.lg log file associated with main db file. Still need to clean up unused methods in oaiinfo, remove debugging and test agains GS3 incr-build-with-manifest tutorial. (Previous 3 commits were commits, not all related.)

Line 
1# This class based on arcinfo.pm
2package oaiinfo;
3
4use constant INFO_STATUS_INDEX  => 0;
5use constant INFO_TIMESTAMP_INDEX => 1;
6
7use constant NO_PD_STEP => 0; # no needing to mark db with "provisionally deleted", no PD pass
8use constant DO_PD_STEP => 1; # PD pass required (pass to mark db "PD" and then later undo it).
9# or SPECIAL_PROCESSING_REQUIRED => 1???
10
11use strict;
12
13use arcinfo;
14use dbutil;
15
16# QUESTIONS:
17# Should we use time or localtime(time) for timestamp? Just timestamp.
18# What format should the timestamp be in, or is the basic format used by perl sufficient? Basic.
19
20# File format read in: OID <tab> Date-timestamp <tab> Deletion-Status
21
22# Deletion status can be:
23#  E = Doc with OID exists (has not been deleted from collection). Timestamp indicates last time of build
24#  D = Doc with OID has been deleted. Timestamp indicates time of deletion
25#  PD = Provisionally Deleted. Timestamp momentarily unaltered.
26
27# oaidb is "always incremental": always reflects the I/B/R/D status of archive info db,
28# before the indexing step of the build phase that alters the I/B/R/D contents of archive info db.
29# (I=index, B=been indexed, R=reindex; D=delete)
30
31sub new {
32    my $class = shift(@_);
33    my ($config_filename, $infodbtype) = @_;
34 
35    my $self = {
36    'info'=>{} # map of {OID, array[deletion-status,timestamp]} pairs
37    };
38   
39    if(!defined $infodbtype) {
40    $infodbtype = &dbutil::get_default_infodb_type();
41    }
42    $infodbtype = "gdbm" if ($infodbtype eq "gdbm-txtgz");
43    $self->{'infodbtype'} = $infodbtype;
44
45    # Create and store the db filenames we'll be working with (tmp and livedb)
46    my $etc_dir = &util::get_parent_folder($config_filename);
47
48    my $perform_firsttime_init = 0;
49    $self->{'oaidb_live_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf", $etc_dir, $perform_firsttime_init);
50    $self->{'oaidb_tmp_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $etc_dir, $perform_firsttime_init);
51    $self->{'etc_dir'} = $etc_dir;
52#    print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
53#    print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
54
55    $self->{'oaidb_file_path'} = $self->{'oaidb_tmp_filepath'}; # db file we're working with
56
57    return bless $self, $class;
58}
59
60# this subroutine will work out the starting contents of the tmp-db (temporary oai db):
61# whether it should start off empty, or with the contents of any existing live-db,
62# or with the contents of any existing tmp-db.
63sub init_tmpdb {
64    my $self = shift(@_);
65    my ($removeold, $have_manifest) = @_;
66
67    # if we have a manifest file, then we pretend we are fully incremental for oaiinfo db.
68    # removeold implies proper full-rebuild, whereas keepold or incremental means incremental
69    if($have_manifest) { # if we have a manifest file, we're not doing removeold/full-rebuild either
70    $removeold = 0;
71    }
72
73    if($removeold) {
74    $self->{'removeold_no_manifest'} = 1;
75    $self->{'do_pd_step'} = DO_PD_STEP; # step where all E will be marked as PD
76    } else {
77    $self->{'removeold_no_manifest'} = 0;
78    $self->{'do_pd_step'} = NO_PD_STEP;
79    }
80   
81    my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
82    my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
83    my $infodbtype = $self->{'infodbtype'};
84    # Note: the live db can only exist if the collection has been activated at least once before
85    my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
86    my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);   
87
88#    print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
89#    print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
90
91    my $initdb = 0;
92   
93    # work out what operation we need to do
94    #    work with empty tmpdb
95    #    copy_livedb_to_tmpdb
96    #    work with existing tmpdb (so existing tmpdb will be topped up)
97
98    # make_contents_of_tmpdb_empty
99    # make_contents_of_tmpdb_that_of_livedb
100    # continue_working_with_tmpdb ("contents_of_tmpdb_is_tmpdb")
101
102    # We're going to prepare the starting state of tmpdb next.
103    # It can start off empty, start off with the contents of livedb, or it can start off with the contents
104    # of the existing tmp db. Which of these three it is depends on the 3 factors: whether livedb exists,
105    # whether tmpdb exists and whether or not removeold is true.
106    # i.o.w. which of the 3 outcomes it is depends on the truth table built on the following 3 variables:
107    #   LDB = LiveDB exists
108    #   TDB = TmpDB exists
109    #   RO = Removeold
110    # OUTCOMES:
111    #   clean slate (create an empty tmpdb/make tmpdb empty)
112    #   top up tmpDB (work with existing tmpdb)
113    #   copy LiveDB to TmpDB (liveDB's contents become the contents of TmpDB, and we'll work with that)
114    #
115    # TRUTH TABLE:
116    # ---------------------------------------
117    # LDB TDB  RO | Outcome
118    # ---------------------------------------
119    #  0   0   0  | clean-slate
120    #  0   0   1  | clean-slate
121    #  0   1   0  | top-up-tmpdb
122    #  0   1   1  | erase tmpdb, clean-slate
123    #  1   0   0  | copy livedb to tmpdb
124    #  1   0   1  | copy livedb to tmpdb
125    #  1   1   0  | top-up-tmpdb
126    #  1   1   1  | copy livedb to tmpd
127    # ---------------------------------------
128    #
129    # Dr Bainbridge worked out using Karnaugh maps that, from the above truth table:
130    # => clean-slate/empty-tmpdb = !LDB && (RO || !TDB)
131    # => top-up-tmpdb/work-with-existing-tmpdb = !RO && TDB
132    # => copy-livedb-to-tmpdb = LDB && (!TDB || RO)
133    # I had most of these tests, except that I hadn't (yet) merged the two clean slate instances
134    # of first-build-ever and make-contents-of-tmpdb-empty
135
136    #my $first_build_ever = (!$livedb_exists && !$tmpdb_exists);
137    #my $make_contents_of_tmpdb_empty = (!$livedb_exists && $tmpdb_exists && $removeold);
138    # Karnaugh map allows merging $first_build_ever and $make_contents_of_tmpdb_empty above
139    # into: my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
140    my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
141    my $make_contents_of_tmpdb_that_of_livedb = ($livedb_exists && (!$tmpdb_exists || $removeold));
142    my $work_with_existing_tmpdb = ($tmpdb_exists && !$removeold);
143
144    if($work_with_empty_tmpdb) { # we'll use an empty tmpdb
145   
146    # If importing the collection for the very first time, neither db exists,
147    # so create an empty tmpdb.
148    #
149    # We also create an empty tmpdb when livedb doesn't exist and $removeold is true.
150    # This can happen if we've never run activate (so no livedb),
151    # yet had done some import (and perhaps building) followed by a full re-import now.
152    # Since there was no activate and we're doing a removeold/full-rebuild now, can just
153    # work with a new tmpdb, even though one already existed, its contents can be wiped out.
154        # In such a scenario, we'll be deleting tmpdb. Then there  will be no livedb nor any tmpdb
155    # any more, so same situation as if importing the very first time when no oaidb exists either.
156
157    &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
158    $initdb = 1; # new tmpdb
159   
160    # if the oai db is created the first time, it's like incremental and
161    # "keepold" (keepold means "only add, don't reprocess existing"). So
162    # no need to do the special passes dealing with "provisional deletes".
163    $self->{'do_pd_step'} = NO_PD_STEP;
164   
165    } elsif ($make_contents_of_tmpdb_that_of_livedb) {
166
167    # If the livedb exists and we're doing a full rebuild ($removeold is true),
168    # copy livedb to tmp regardless of if tmpdb already exists.
169    # Or if the livedb exists and tmpdb doesn't exist, it doesn't matter
170    # if we're incremental or not: also copy live to tmp and work with tmp.
171   
172    # copy livedb to tmpdb
173    &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
174    &FileUtils::copyFiles($oaidb_live_filepath, $oaidb_tmp_filepath);
175   
176    $initdb = 0; # tmpdb exists, since we just copied livedb to tmpdb, so need to init existing tmpdb
177
178    } else { # $work_with_existing_tmpdb, so we'll build on top of what's presently already in tmpdb
179         # (we'll be topping up the current tmpdb)
180
181    # !$removeold, meaning incremental
182    # If incremental and have a tmpdb already, regardless of whether livedb exists,
183    # then work with the existing tmpdb file, as this means we've been
184    # importing (perhaps followed by building) repeatedly without activating the
185    # last time but want to maintain the (incremental) changes in tmpdb.       
186     
187    $initdb = 0;
188
189    } # Dr Bainbridge drew up Karnaugh maps on the truth table, which proved that all cases
190                    # are indeed covered above, so don't need any other catch-all else here
191
192    $self->{'oaidb_file_path'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $self->{'etc_dir'}, $initdb);
193                                 # final param follows jmt's $perform_firsttime_init in inexport.pm
194
195#    print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n";
196   
197}
198
199sub set_proc_mode {
200    my $self = shift (@_);
201    my ($mode) = @_;
202   
203    $self->{'do_pd_step'} = $mode;
204}
205
206# returns 1 if doing PD step (marking entries as provisionally deleted)
207# returns 0 if not doing PD step, which happens when we're purely incremental or building first time.
208sub get_proc_mode {
209    my $self = shift (@_);
210    return $self->{'do_pd_step'};
211}
212
213sub get_filepath {
214    my $self = shift (@_);
215    return $self->{'oaidb_file_path'};
216}
217
218sub import_stage {
219    my $self = shift (@_);
220    my ($removeold, $have_manifest) = @_;
221   
222    $self->init_tmpdb($removeold, $have_manifest);
223
224    my $do_pd_step = $self->{'do_pd_step'}; # 1 if the step to mark oaidb entries as PD is required
225    # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db,
226    # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out
227
228   
229    $self->load_info();
230    $self->print_info(); # DEBUGGING
231
232    if ($do_pd_step) {
233    $self->mark_all_existing_as_provisionallydeleted();
234    $self->print_info(); # DEBUGGING
235
236    # save to db file now that we're done
237    $self->save_info();
238    }
239
240}
241
242sub building_stage_before_indexing() {
243    my $self = shift (@_);   
244    my ($archivedir) = @_;
245
246    # load archive info db into memory
247    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $archivedir);
248    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-src", $archivedir);
249    my $archive_info = new arcinfo ($self->{'infodbtype'});
250    $archive_info->load_info ($arcinfo_doc_filename);
251
252    #my $started_from_scratch = &FileUtils::fileTest($self->{'oaidb_tmp_filepath'}, '-z'); # 1 if tmpdb is empty
253        # -z test for file is empty http://www.perlmonks.org/?node_id=927447
254   
255    # load the oaidb file's contents into memory.
256    $self->load_info();
257    $self->print_info(); # DEBUGGING
258
259    # process all the index, reindex and delete operations as indicated in arcinfo,
260    # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db. 
261
262    foreach my $OID (keys $archive_info->{'info'}) {
263    my $arcinf_tuple = $archive_info->{'info'}->{$OID};
264    my $indexing_status = $arcinf_tuple->[arcinfo::INFO_STATUS_INDEX];
265                 # use packageName::constant to refer to constants declared in another package,
266                 # see http://perldoc.perl.org/constant.html
267
268    print STDERR "######## OID: $OID - status: $indexing_status\n";
269
270    if($indexing_status eq "I") {
271        $self->index($OID); # add new as E with current timestamp/or set existing as E with orig timestamp
272    } elsif($indexing_status eq "R") {
273        $self->reindex($OID); # update timestamp and ensure marked as E (if oid doesn't exist, add new)
274    } elsif($indexing_status eq "D") {
275        $self->delete($OID); # set as D with current timestamp
276    } elsif($indexing_status eq "B") { # B for "been indexed"
277        $self->been_indexed($OID); # will flip any PD to E if oid exists, else will add new entry for oid
278        # A new entry may be required if the collection had been built prior to turning this into
279        # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here.
280    } else {
281        print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
282    }
283    }
284
285    # once all docs processed, go through oaiiinfo db changing any PDs to D along with current timestamp
286    # to indicate that they're deleted
287    $self->mark_all_provisionallydeleted_as_deleted();
288    $self->print_info();
289   
290    # let's save to db file now that we're done
291    $self->save_info();
292   
293}
294
295sub activate_collection { # move tmp db to live db
296    my $self = shift (@_);
297
298    my $oaidb_live_filepath =  $self->{'oaidb_live_filepath'};
299    my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
300
301    my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
302    my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
303
304    if($tmpdb_exists) {
305    if($livedb_exists) {
306        #&dbutil::remove_db_file($self->{'infodbtype'}, s$oaidb_live_filepath); # remove the db file and any assoc files
307        &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_live_filepath, $oaidb_live_filepath.".bak"); # rename the db file and any assoc files
308    }
309    #&FileUtils::moveFiles($oaidb_tmp_filepath, $oaidb_live_filepath);
310    &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_tmp_filepath, $oaidb_live_filepath); # rename the db file and any assoc files
311
312    print STDERR "#### Should now have MOVED $self->{'oaidb_tmp_filepath'} to $self->{'oaidb_live_filepath'}\n";
313   
314    } else {
315    print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
316    print STDERR "@@@@@   No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
317    print STDERR "@@@@@   to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
318    }
319}
320
321############################### FOR FULL AND INCR BUILDING ##########################
322# add or reindex if incremental (updating timestamp to current),
323# add or (re-)mark PDs as status=E for existing if full build, keeping timestamp.
324# This subroutine will end up adding if firstbuild (since then the OID won't already exist)
325sub Xprocess_new_or_existing {
326    my $self = shift (@_);
327    my ($OID) = @_;
328   
329    my $OID_info = $self->{'info'}->{$OID};
330    if (defined $OID_info) {
331   
332    if ($self->{'do_pd_step'} == NO_PD_STEP) {  # update modification time for reindexed doc
333        $OID_info->[INFO_TIMESTAMP_INDEX] = $self->get_current_time();
334    }   
335    else { # mode is DO_PD_STEP
336        $OID_info->[INFO_STATUS_INDEX] = "E"; # will flip any PD status of existing documents back to E,
337        # so need to keep existing timestamps.
338        # Can't be called on deleted docs, so status was never D
339        # $self->set_status_info($OID, "E"); # will only set it if OID exists
340       
341       
342    }
343    } else { # if OID is not present, then it's new and now added as existing from current time on
344    $self->set_info($OID, "E", $self->get_current_time());
345    }   
346   
347}
348
349##################### SPECIFIC TO PD-STEP ####################
350
351
352# mark all existing, E (non-deleted) OIDs as Provisionally Deleted (PD)
353# this subroutine doesn't save to oai-inf.DB
354# the caller should call save_info when they want to save to the db
355sub mark_all_existing_as_provisionallydeleted {
356    my $self = shift (@_);
357   
358    print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n";
359
360    foreach my $OID (keys $self->{'info'}) {
361    my $curr_status = $self->get_status_info($OID);
362    if(defined $curr_status && $curr_status eq "E") {
363        $self->set_status_info($OID, "PD");
364    }
365    }
366}
367
368# mark all OIDs that are Provisionally Deleted (PD) as deleted, and set to current timestamp
369# To be called at end of build. Again, the caller should save to DB by calling save_info.
370sub mark_all_provisionallydeleted_as_deleted {
371    my $self = shift (@_);
372   
373    print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n";
374
375    foreach my $OID (keys $self->{'info'}) {
376    my $curr_status = $self->get_status_info($OID);
377    if(defined $curr_status && $curr_status eq "PD") {
378        $self->set_info($OID, "D", $self->get_current_time());
379    }
380    }
381}
382
383# find the OID, if it exists, make its status=E for existing.
384sub Xset_status_to_existing_if_OID_present {
385    my $self = shift (@_);
386    my ($OID) = @_;
387
388    $self->set_status_info($OID, "E"); # will only set it if OID exists
389}
390
391##################### GENERAL, NOT SPECIFIC TO PD-STEP ####################
392
393sub print_info {
394    my $self = shift (@_);
395
396    print STDERR "###########################################################\n";
397    print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n";
398   
399    foreach my $OID (keys $self->{'info'}) {
400    print STDERR "OID: $OID";
401    print STDERR " status: " . $self->{'info'}->{$OID}->[INFO_STATUS_INDEX];
402    print STDERR " time: " . $self->{'info'}->{$OID}->[INFO_TIMESTAMP_INDEX];
403    print STDERR "\n";
404    }
405
406    print STDERR "###########################################################\n";
407}
408
409
410# Find the OID, if it exists, make its status=E for existing. Leave its timestamp alone.
411# If the OID doesn't yet exist, add it as a new entry with status=E and with current timestamp.
412sub index { # Add a new oid with current time and E. If the oid was already present, mark as E
413    my $self = shift (@_);
414    my ($OID) = @_;
415   
416    my $OID_info = $self->{'info'}->{$OID};
417   
418    if (defined $OID_info) { # if OID is present, this will change status back to E, timestamp unchanged
419    $OID_info->[INFO_STATUS_INDEX] = "E";
420   
421    } else { # if OID is not present, then it's now added as existing from current time on
422    $self->set_info($OID, "E", $self->get_current_time());
423    }
424}
425
426# Upon reindexing a document with identifier OID, change its timestamp to current time
427# if a new OID, then add as new entry with status=E and current timestamp
428sub reindex { # update timestamp if oid is already present, if not (unlikely), add as new
429    my $self = shift (@_);
430    my ($OID) = @_;
431
432    my $OID_info = $self->{'info'}->{$OID};   
433    $self->set_info($OID, "E", $self->get_current_time()); # Takes care of 3 things:
434       # if OID exists, updates modified time to indicate the doc has been reindexed
435       # if OID exists, ensures any status=PD is flipped back to E for this OID doc (as we know it exists);
436       # if the OID doesn't yet exist, adds a new OID entry with status=E and current timestamp.
437
438}
439
440
441# Does the same as index():
442# OIDs that have been indexed upon rebuild may still be new to the oaidb: GS2 collections
443# are not OAI collections by default, unlike GS3 collections. Imagine rebuilding a (GS2) collection
444# 5 times and then setting them to be an OAI collection. In that case, the doc OIDs in the collection
445# may not be in the oaidb yet. Unless, we decide (as is the present case) to always maintain an oaidb
446# (always creating an oaidb regardless of whether the collection has OAI support turned on or not).
447sub been_indexed {
448    my $self = shift (@_);
449    my ($OID) = @_;
450
451    $self->index($OID);
452}
453
454# Upon deleting a document with identifier OID,
455# set status to deleted and change its timestamp to current time
456sub delete {
457    my $self = shift (@_);
458    my ($OID) = @_;
459
460    # the following method will set to current time if no timestamp provided,
461    # But by explicit here, the code is easier to follow
462    $self->set_info($OID, "D", $self->get_current_time());
463
464}
465
466#############################################################
467sub get_current_time {
468    my $self = shift (@_);
469    return time; # current time
470
471    # localtime(time) returns an array of values (day, month, year, hour, min, seconds) or singular string
472    # return localtime; # same as localtime(time); # http://perldoc.perl.org/functions/localtime.html
473   
474}
475
476sub _load_info_txt
477{
478    my $self = shift (@_);
479    my ($filename) = @_;
480
481    if (defined $filename && &FileUtils::fileExists($filename)) {
482    open (INFILE, $filename) ||
483        die "oaiinfo::load_info couldn't read $filename\n";
484
485    my ($line, @lineparts);
486    while (defined ($line = <INFILE>)) {
487        $line =~ s/\cM|\cJ//g; # remove end-of-line characters
488        @lineparts = split ("\t", $line);
489        if (scalar(@lineparts) >= 2) {
490        $self->add_info (@lineparts);
491        }
492    }
493    close (INFILE);
494    }
495
496
497}
498
499sub _load_info_db
500{
501    my $self = shift (@_);
502    my ($filename) = @_;
503
504    my $infodb_map = {};
505
506    &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
507
508    foreach my $oid ( keys $infodb_map ) {
509    my $vals = $infodb_map->{$oid};
510    # interested in oid, timestamp, deletion status
511
512    my ($deletion_status) = ($vals=~/^<status>(.*)$/m);
513    my ($timestamp) = ($vals=~/^<timestamp>(.*)$/m);
514   
515    $self->add_info ($oid, $deletion_status, $timestamp);
516    }
517}
518
519# if no filename is passed in (and you don't generally want to), then
520# it tries to load in <collection>/etc/oai-inf.<db> if it exists
521sub load_info {
522    my $self = shift (@_);
523    my ($filename) = @_;
524
525    $self->{'info'} = {};
526
527    $filename = $self->{'oaidb_file_path'} unless defined $filename;
528
529    if (&FileUtils::fileExists($filename)) {
530    if ($filename =~ m/\.inf$/) {
531        $self->_load_info_txt($filename);
532    }
533    else {
534        $self->_load_info_db($filename);
535    }
536    }
537
538}
539
540sub _save_info_txt {
541    my $self = shift (@_);
542    my ($filename) = @_;
543
544    my ($OID, $info);
545
546    open (OUTFILE, ">$filename") ||
547    die "oaiinfo::save_info couldn't write $filename\n";
548 
549    foreach $info (@{$self->get_OID_list()}) {
550    if (defined $info) {
551        print OUTFILE join("\t", @$info), "\n";
552    }
553    }
554    close (OUTFILE);
555}
556
557# if no filename is passed in (and you don't generally want to), then
558# this subroutine tries to write to <collection>/etc/oai-inf.<db>.
559sub _save_info_db {
560    my $self = shift (@_);
561    my ($filename) = @_;
562
563    $filename = $self->{'oaidb_file_path'} unless defined $filename;
564    my $infodbtype = $self->{'infodbtype'};
565
566    # write out again. Open file for overwriting, not appending.
567    # Then write out data structure $self->{'info'} that's been maintaining the data in-memory.
568    my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename);
569    foreach my $oid ( keys $self->{'info'} ) {
570    my $OID_info = $self->{'info'}->{$oid};
571    #my $val_hash = {
572    #    "status" => $OID_info->[INFO_STATUS_INDEX],
573    #    "timestamp" => $OID_info->[INFO_TIMESTAMP_INDEX]
574    #};
575   
576    #&dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val_hash);
577
578    my $val = "<status>".$OID_info->[INFO_STATUS_INDEX]."\n<timestamp>".$OID_info->[INFO_TIMESTAMP_INDEX]."\n";
579    &dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val);
580    }
581    &dbutil::close_infodb_write_handle($infodbtype, $infodb_handle);
582}
583
584sub save_info {
585    my $self = shift (@_);
586    my ($filename) = @_;
587
588    if(defined $filename) {
589    if ($filename =~ m/(contents)|(\.inf)$/) {
590        $self->_save_info_txt($filename);
591    }
592    else {
593        $self->_save_info_db($filename);
594    }
595    } else {
596    $self->_save_info_db();
597    }
598}
599
600sub delete_info {
601    my $self = shift (@_);
602    my ($OID) = @_;
603
604    print STDERR "@@@@ ERROR oaiinfo::delete_info: Not allowed to delete entries in oai DB.\n";
605
606    #if (defined $self->{'info'}->{$OID}) {
607    #   delete $self->{'info'}->{$OID};
608    #}
609}
610
611sub set_info { # sets existing or appends
612    my $self = shift (@_);
613    my ($OID, $del_status, $timestamp) = @_;
614    if(!defined $timestamp) { # get current date timestamp
615    $timestamp = $self->get_current_time();
616    }
617    $self->{'info'}->{$OID} = [$del_status, $timestamp];
618
619}
620
621sub add_info { # appends iff it doesn't exist already
622    my $self = shift (@_);
623    my ($OID, $del_status, $timestamp) = @_;
624
625    if (defined($self->{'info'}->{$OID})) {
626    print STDERR "@@@@ ERROR oaiinfo::add_info: id $OID already exists. Not adding.\n";
627    } else {
628    $self->set_info($OID, $del_status, $timestamp);
629    }
630}
631
632sub set_status_info {
633    my $self = shift (@_);
634    my ($OID, $del_status) = @_;
635
636    my $OID_info = $self->{'info'}->{$OID};
637    if (defined $OID_info) {
638    $OID_info->[INFO_STATUS_INDEX] = $del_status;
639    }
640    else {
641    print STDERR "oaiinfo::set_status_info: Unable to find OAI document id $OID\n";
642    }
643}
644
645sub get_status_info {
646    my $self = shift (@_);
647    my ($OID) = @_;
648
649    my $del_status = undef;
650
651    my $OID_info = $self->{'info'}->{$OID};
652    if (defined $OID_info) {
653    $del_status = $OID_info->[INFO_STATUS_INDEX];
654    }
655    else {
656    print STDERR "oaiinfo::get_status_info: Unable to find OAI document id $OID\n";
657    }
658
659    return $del_status;
660}
661
662# if no timestamp provided, uses current timestamp
663sub set_timestamp_info {
664    my $self = shift (@_);
665    my ($OID, $timestamp) = @_;
666
667    my $OID_info = $self->{'info'}->{$OID};
668    if (defined $OID_info) {
669    if(!defined $timestamp) {
670        $timestamp = $self->get_current_time();
671    }
672    $OID_info->[INFO_TIMESTAMP_INDEX] = $timestamp;
673    }
674    else {
675    print STDERR "oaiinfo::set_timestamp_info: Unable to find OAI document id $OID\n";
676    }
677}
678
679sub get_timestamp_info {
680    my $self = shift (@_);
681    my ($OID) = @_;
682
683    my $timestamp = undef;
684
685    my $OID_info = $self->{'info'}->{$OID};
686    if (defined $OID_info) {
687    $timestamp = $OID_info->[INFO_TIMESTAMP_INDEX];
688    }
689    else {
690    print STDERR "oaiinfo::get_timestamp_info: Unable to find OAI document id $OID\n";
691    }
692
693    return $timestamp;
694}
695
696
697# returns a list of the form [[OID, timestamp, deletion_status], ...]
698sub get_OID_list
699{
700    my $self = shift (@_);
701
702    my @list = ();
703
704    foreach my $OID (keys $self->{'info'}) {   
705    my $OID_info = $self->{'info'}->{$OID};
706
707    push (@list, [$OID, $OID_info->[INFO_STATUS_INDEX],
708              $OID_info->[INFO_TIMESTAMP_INDEX]]);
709    }
710
711    return \@list;
712}
713
714
715
716# returns an array/list of the form [deletion_status, timestamp]
717sub get_info {
718    my $self = shift (@_);
719    my ($OID) = @_;
720
721    if (defined $self->{'info'}->{$OID}) {
722    return $self->{'info'}->{$OID};
723    }
724
725    return undef;
726}
727
728
729
730# returns the number of entries so far, including deleted ones
731# http://stackoverflow.com/questions/1109095/how-can-i-find-the-number-of-keys-in-a-hash-in-perl
732sub size {
733    my $self = shift (@_);
734    return (scalar keys $self->{'info'});
735}
736
7371;
Note: See TracBrowser for help on using the browser.