root/main/trunk/greenstone2/perllib/oaiinfo.pm @ 31191

Revision 31191, 25.7 KB (checked in by ak19, 3 years ago)

Correction to previous commit.

Line 
1# This class based on arcinfo.pm
2package oaiinfo;
3
4use constant INFO_STATUS_INDEX  => 0;
5use constant INFO_TIMESTAMP_INDEX => 1;
6
7use constant NO_PD_STEP => 0; # no needing to mark db with "provisionally deleted", no PD pass
8use constant DO_PD_STEP => 1; # PD pass required (pass to mark db "PD" and then later undo it).
9# or SPECIAL_PROCESSING_REQUIRED => 1???
10
11use strict;
12
13use arcinfo;
14use dbutil;
15
16# QUESTIONS:
17# Should we use time or localtime(time) for timestamp? Just timestamp.
18# What format should the timestamp be in, or is the basic format used by perl sufficient? Basic.
19
20# File format read in: OID <tab> Date-timestamp <tab> Deletion-Status
21
22# Deletion status can be:
23#  E = Doc with OID exists (has not been deleted from collection). Timestamp indicates last time of build
24#  D = Doc with OID has been deleted. Timestamp indicates time of deletion
25#  PD = Provisionally Deleted. Timestamp momentarily unaltered.
26
27# oaidb is "always incremental": always reflects the I/B/R/D status of archive info db,
28# before the indexing step of the build phase that alters the I/B/R/D contents of archive info db.
29# (I=index, B=been indexed, R=reindex; D=delete)
30
31sub new {
32    my $class = shift(@_);
33    my ($config_filename, $infodbtype) = @_;
34 
35    my $self = {
36    'info'=>{} # map of {OID, array[deletion-status,timestamp]} pairs
37    };
38   
39    if(!defined $infodbtype) {
40    $infodbtype = &dbutil::get_default_infodb_type();
41    }
42    $infodbtype = "gdbm" if ($infodbtype eq "gdbm-txtgz");
43    $self->{'infodbtype'} = $infodbtype;
44
45    # Create and store the db filenames we'll be working with (tmp and livedb)
46    my $etc_dir = &util::get_parent_folder($config_filename);
47
48    my $perform_firsttime_init = 0;
49    $self->{'oaidb_live_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf", $etc_dir, $perform_firsttime_init);
50    $self->{'oaidb_tmp_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $etc_dir, $perform_firsttime_init);
51    $self->{'etc_dir'} = $etc_dir;
52#    print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
53#    print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
54
55    $self->{'oaidb_file_path'} = $self->{'oaidb_tmp_filepath'}; # db file we're working with
56
57    return bless $self, $class;
58}
59
60# this subroutine will work out the starting contents of the tmp-db (temporary oai db):
61# whether it should start off empty, or with the contents of any existing live-db,
62# or with the contents of any existing tmp-db.
63sub init_tmpdb {
64    my $self = shift(@_);
65    my ($removeold, $have_manifest) = @_;
66
67    # if we have a manifest file, then we pretend we are fully incremental for oaiinfo db.
68    # removeold implies proper full-rebuild, whereas keepold or incremental means incremental
69    if($have_manifest) { # if we have a manifest file, we're not doing removeold/full-rebuild either
70    $removeold = 0;
71    }
72
73    if($removeold) {
74    $self->{'removeold_no_manifest'} = 1;
75    $self->{'do_pd_step'} = DO_PD_STEP; # step where all E will be marked as PD
76    } else {
77    $self->{'removeold_no_manifest'} = 0;
78    $self->{'do_pd_step'} = NO_PD_STEP;
79    }
80   
81    my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
82    my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
83    my $infodbtype = $self->{'infodbtype'};
84    # Note: the live db can only exist if the collection has been activated at least once before
85    my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
86    my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);   
87
88#    print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
89#    print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
90
91    my $initdb = 0;
92   
93    # work out what operation we need to do
94    #    work with empty tmpdb
95    #    copy_livedb_to_tmpdb
96    #    work with existing tmpdb (so existing tmpdb will be topped up)
97
98    # make_contents_of_tmpdb_empty
99    # make_contents_of_tmpdb_that_of_livedb
100    # continue_working_with_tmpdb ("contents_of_tmpdb_is_tmpdb")
101
102    # We're going to prepare the starting state of tmpdb next.
103    # It can start off empty, start off with the contents of livedb, or it can start off with the contents
104    # of the existing tmp db. Which of these three it is depends on the 3 factors: whether livedb exists,
105    # whether tmpdb exists and whether or not removeold is true.
106    # i.o.w. which of the 3 outcomes it is depends on the truth table built on the following 3 variables:
107    #   LDB = LiveDB exists
108    #   TDB = TmpDB exists
109    #   RO = Removeold
110    # OUTCOMES:
111    #   clean slate (create an empty tmpdb/make tmpdb empty)
112    #   top up tmpDB (work with existing tmpdb)
113    #   copy LiveDB to TmpDB (liveDB's contents become the contents of TmpDB, and we'll work with that)
114    #
115    # TRUTH TABLE:
116    # ---------------------------------------
117    # LDB TDB  RO | Outcome
118    # ---------------------------------------
119    #  0   0   0  | clean-slate
120    #  0   0   1  | clean-slate
121    #  0   1   0  | top-up-tmpdb
122    #  0   1   1  | erase tmpdb, clean-slate
123    #  1   0   0  | copy livedb to tmpdb
124    #  1   0   1  | copy livedb to tmpdb
125    #  1   1   0  | top-up-tmpdb
126    #  1   1   1  | copy livedb to tmpd
127    # ---------------------------------------
128    #
129    # Dr Bainbridge worked out using Karnaugh maps that, from the above truth table:
130    # => clean-slate/empty-tmpdb = !LDB && (RO || !TDB)
131    # => top-up-tmpdb/work-with-existing-tmpdb = !RO && TDB
132    # => copy-livedb-to-tmpdb = LDB && (!TDB || RO)
133    # I had most of these tests, except that I hadn't (yet) merged the two clean slate instances
134    # of first-build-ever and make-contents-of-tmpdb-empty
135
136    #my $first_build_ever = (!$livedb_exists && !$tmpdb_exists);
137    #my $make_contents_of_tmpdb_empty = (!$livedb_exists && $tmpdb_exists && $removeold);
138    # Karnaugh map allows merging $first_build_ever and $make_contents_of_tmpdb_empty above
139    # into: my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
140    my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
141    my $make_contents_of_tmpdb_that_of_livedb = ($livedb_exists && (!$tmpdb_exists || $removeold));
142    my $work_with_existing_tmpdb = ($tmpdb_exists && !$removeold);
143
144    if($work_with_empty_tmpdb) { # we'll use an empty tmpdb
145   
146    # If importing the collection for the very first time, neither db exists,
147    # so create an empty tmpdb.
148    #
149    # We also create an empty tmpdb when livedb doesn't exist and $removeold is true.
150    # This can happen if we've never run activate (so no livedb),
151    # yet had done some import (and perhaps building) followed by a full re-import now.
152    # Since there was no activate and we're doing a removeold/full-rebuild now, can just
153    # work with a new tmpdb, even though one already existed, its contents can be wiped out.
154        # In such a scenario, we'll be deleting tmpdb. Then there  will be no livedb nor any tmpdb
155    # any more, so same situation as if importing the very first time when no oaidb exists either.
156
157    &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
158    $initdb = 1; # new tmpdb
159   
160    # if the oai db is created the first time, it's like incremental and
161    # "keepold" (keepold means "only add, don't reprocess existing"). So
162    # no need to do the special passes dealing with "provisional deletes".
163    $self->{'do_pd_step'} = NO_PD_STEP;
164   
165    } elsif ($make_contents_of_tmpdb_that_of_livedb) {
166
167    # If the livedb exists and we're doing a full rebuild ($removeold is true),
168    # copy livedb to tmp regardless of if tmpdb already exists.
169    # Or if the livedb exists and tmpdb doesn't exist, it doesn't matter
170    # if we're incremental or not: also copy live to tmp and work with tmp.
171   
172    # copy livedb to tmpdb
173    &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
174    &FileUtils::copyFiles($oaidb_live_filepath, $oaidb_tmp_filepath);
175   
176    $initdb = 0; # tmpdb exists, since we just copied livedb to tmpdb, so need to init existing tmpdb
177
178    } else { # $work_with_existing_tmpdb, so we'll build on top of what's presently already in tmpdb
179         # (we'll be topping up the current tmpdb)
180
181    # !$removeold, meaning incremental
182    # If incremental and have a tmpdb already, regardless of whether livedb exists,
183    # then work with the existing tmpdb file, as this means we've been
184    # importing (perhaps followed by building) repeatedly without activating the
185    # last time but want to maintain the (incremental) changes in tmpdb.       
186     
187    $initdb = 0;
188
189    } # Dr Bainbridge drew up Karnaugh maps on the truth table, which proved that all cases
190                    # are indeed covered above, so don't need any other catch-all else here
191
192    $self->{'oaidb_file_path'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $self->{'etc_dir'}, $initdb);
193                                 # final param follows jmt's $perform_firsttime_init in inexport.pm
194
195#    print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n";
196   
197    return $self->{'do_pd_step'};
198}
199
200sub set_proc_mode {
201    my $self = shift (@_);
202    my ($mode) = @_;
203   
204    $self->{'do_pd_step'} = $mode;
205}
206
207# returns 1 if doing PD step (marking entries as provisionally deleted)
208# returns 0 if not doing PD step, which happens when we're purely incremental or building first time.
209sub get_proc_mode {
210    my $self = shift (@_);
211    return $self->{'do_pd_step'};
212}
213
214sub get_filepath {
215    my $self = shift (@_);
216    return $self->{'oaidb_file_path'};
217}
218
219sub import_stage {
220    my $self = shift (@_);
221    my ($removeold, $have_manifest) = @_;
222   
223
224    my $do_pd_step = $self->init_tmpdb($removeold, $have_manifest);
225       # 1 if the step to mark oaidb entries as PD is required
226       # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db,
227       # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out
228
229   
230    $self->load_info();
231    $self->print_info(); # DEBUGGING
232
233    if ($do_pd_step) {
234    $self->mark_all_existing_as_provisionallydeleted();
235    $self->print_info(); # DEBUGGING
236
237    # save to db file now that we're done
238    $self->save_info();
239    }
240
241}
242
243sub building_stage_before_indexing() {
244    my $self = shift (@_);   
245    my ($archivedir) = @_;
246
247    # load archive info db into memory
248    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $archivedir);
249    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-src", $archivedir);
250    my $archive_info = new arcinfo ($self->{'infodbtype'});
251    $archive_info->load_info ($arcinfo_doc_filename);
252
253    #my $started_from_scratch = &FileUtils::fileTest($self->{'oaidb_tmp_filepath'}, '-z'); # 1 if tmpdb is empty
254        # -z test for file is empty http://www.perlmonks.org/?node_id=927447
255   
256    # load the oaidb file's contents into memory.
257    $self->load_info();
258    $self->print_info(); # DEBUGGING
259
260    # process all the index, reindex and delete operations as indicated in arcinfo,
261    # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db. 
262
263    foreach my $OID (keys $archive_info->{'info'}) {
264    my $arcinf_tuple = $archive_info->{'info'}->{$OID};
265    my $indexing_status = $arcinf_tuple->[arcinfo::INFO_STATUS_INDEX];
266                 # use packageName::constant to refer to constants declared in another package,
267                 # see http://perldoc.perl.org/constant.html
268
269    print STDERR "######## OID: $OID - status: $indexing_status\n";
270
271    if($indexing_status eq "I") {
272        $self->index($OID); # add new as E with current timestamp/or set existing as E with orig timestamp
273    } elsif($indexing_status eq "R") {
274        $self->reindex($OID); # update timestamp and ensure marked as E (if oid doesn't exist, add new)
275    } elsif($indexing_status eq "D") {
276        $self->delete($OID); # set as D with current timestamp
277    } elsif($indexing_status eq "B") { # B for "been indexed"
278        $self->been_indexed($OID); # will flip any PD to E if oid exists, else will add new entry for oid
279        # A new entry may be required if the collection had been built prior to turning this into
280        # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here.
281    } else {
282        print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
283    }
284    }
285
286    # once all docs processed, go through oaiiinfo db changing any PDs to D along with current timestamp
287    # to indicate that they're deleted
288    $self->mark_all_provisionallydeleted_as_deleted();
289    $self->print_info();
290   
291    # let's save to db file now that we're done
292    $self->save_info();
293   
294}
295
296sub activate_collection { # move tmp db to live db
297    my $self = shift (@_);
298
299    my $oaidb_live_filepath =  $self->{'oaidb_live_filepath'};
300    my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
301
302    my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
303    my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
304
305    if($tmpdb_exists) {
306    if($livedb_exists) {
307        #&dbutil::remove_db_file($self->{'infodbtype'}, s$oaidb_live_filepath); # remove the db file and any assoc files
308        &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_live_filepath, $oaidb_live_filepath.".bak"); # rename the db file and any assoc files
309    }
310    #&FileUtils::moveFiles($oaidb_tmp_filepath, $oaidb_live_filepath);
311    &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_tmp_filepath, $oaidb_live_filepath); # rename the db file and any assoc files
312
313    print STDERR "#### Should now have MOVED $self->{'oaidb_tmp_filepath'} to $self->{'oaidb_live_filepath'}\n";
314   
315    } else {
316    print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
317    print STDERR "@@@@@   No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
318    print STDERR "@@@@@   to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
319    }
320}
321
322############################### FOR FULL AND INCR BUILDING ##########################
323# add or reindex if incremental (updating timestamp to current),
324# add or (re-)mark PDs as status=E for existing if full build, keeping timestamp.
325# This subroutine will end up adding if firstbuild (since then the OID won't already exist)
326sub Xprocess_new_or_existing {
327    my $self = shift (@_);
328    my ($OID) = @_;
329   
330    my $OID_info = $self->{'info'}->{$OID};
331    if (defined $OID_info) {
332   
333    if ($self->{'do_pd_step'} == NO_PD_STEP) {  # update modification time for reindexed doc
334        $OID_info->[INFO_TIMESTAMP_INDEX] = $self->get_current_time();
335    }   
336    else { # mode is DO_PD_STEP
337        $OID_info->[INFO_STATUS_INDEX] = "E"; # will flip any PD status of existing documents back to E,
338        # so need to keep existing timestamps.
339        # Can't be called on deleted docs, so status was never D
340        # $self->set_status_info($OID, "E"); # will only set it if OID exists
341       
342       
343    }
344    } else { # if OID is not present, then it's new and now added as existing from current time on
345    $self->set_info($OID, "E", $self->get_current_time());
346    }   
347   
348}
349
350##################### SPECIFIC TO PD-STEP ####################
351
352
353# mark all existing, E (non-deleted) OIDs as Provisionally Deleted (PD)
354# this subroutine doesn't save to oai-inf.DB
355# the caller should call save_info when they want to save to the db
356sub mark_all_existing_as_provisionallydeleted {
357    my $self = shift (@_);
358   
359    print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n";
360
361    foreach my $OID (keys $self->{'info'}) {
362    my $curr_status = $self->get_status_info($OID);
363    if(defined $curr_status && $curr_status eq "E") {
364        $self->set_status_info($OID, "PD");
365    }
366    }
367}
368
369# mark all OIDs that are Provisionally Deleted (PD) as deleted, and set to current timestamp
370# To be called at end of build. Again, the caller should save to DB by calling save_info.
371sub mark_all_provisionallydeleted_as_deleted {
372    my $self = shift (@_);
373   
374    print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n";
375
376    foreach my $OID (keys $self->{'info'}) {
377    my $curr_status = $self->get_status_info($OID);
378    if(defined $curr_status && $curr_status eq "PD") {
379        $self->set_info($OID, "D", $self->get_current_time());
380    }
381    }
382}
383
384# find the OID, if it exists, make its status=E for existing.
385sub Xset_status_to_existing_if_OID_present {
386    my $self = shift (@_);
387    my ($OID) = @_;
388
389    $self->set_status_info($OID, "E"); # will only set it if OID exists
390}
391
392##################### GENERAL, NOT SPECIFIC TO PD-STEP ####################
393
394sub print_info {
395    my $self = shift (@_);
396
397    print STDERR "###########################################################\n";
398    print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n";
399   
400    foreach my $OID (keys $self->{'info'}) {
401    print STDERR "OID: $OID";
402    print STDERR " status: " . $self->{'info'}->{$OID}->[INFO_STATUS_INDEX];
403    print STDERR " time: " . $self->{'info'}->{$OID}->[INFO_TIMESTAMP_INDEX];
404    print STDERR "\n";
405    }
406
407    print STDERR "###########################################################\n";
408}
409
410
411# Find the OID, if it exists, make its status=E for existing. Leave its timestamp alone.
412# If the OID doesn't yet exist, add it as a new entry with status=E and with current timestamp.
413sub index { # Add a new oid with current time and E. If the oid was already present, mark as E
414    my $self = shift (@_);
415    my ($OID) = @_;
416   
417    my $OID_info = $self->{'info'}->{$OID};
418   
419    if (defined $OID_info) { # if OID is present, this will change status back to E, timestamp unchanged
420    $OID_info->[INFO_STATUS_INDEX] = "E";
421   
422    } else { # if OID is not present, then it's now added as existing from current time on
423    $self->set_info($OID, "E", $self->get_current_time());
424    }
425}
426
427# Upon reindexing a document with identifier OID, change its timestamp to current time
428# if a new OID, then add as new entry with status=E and current timestamp
429sub reindex { # update timestamp if oid is already present, if not (unlikely), add as new
430    my $self = shift (@_);
431    my ($OID) = @_;
432
433    my $OID_info = $self->{'info'}->{$OID};   
434    $self->set_info($OID, "E", $self->get_current_time()); # Takes care of 3 things:
435       # if OID exists, updates modified time to indicate the doc has been reindexed
436       # if OID exists, ensures any status=PD is flipped back to E for this OID doc (as we know it exists);
437       # if the OID doesn't yet exist, adds a new OID entry with status=E and current timestamp.
438
439}
440
441
442# Does the same as index():
443# OIDs that have been indexed upon rebuild may still be new to the oaidb: GS2 collections
444# are not OAI collections by default, unlike GS3 collections. Imagine rebuilding a (GS2) collection
445# 5 times and then setting them to be an OAI collection. In that case, the doc OIDs in the collection
446# may not be in the oaidb yet. Unless, we decide (as is the present case) to always maintain an oaidb
447# (always creating an oaidb regardless of whether the collection has OAI support turned on or not).
448sub been_indexed {
449    my $self = shift (@_);
450    my ($OID) = @_;
451
452    $self->index($OID);
453}
454
455# Upon deleting a document with identifier OID,
456# set status to deleted and change its timestamp to current time
457sub delete {
458    my $self = shift (@_);
459    my ($OID) = @_;
460
461    # the following method will set to current time if no timestamp provided,
462    # But by explicit here, the code is easier to follow
463    $self->set_info($OID, "D", $self->get_current_time());
464
465}
466
467#############################################################
468sub get_current_time {
469    my $self = shift (@_);
470    return time; # current time
471
472    # localtime(time) returns an array of values (day, month, year, hour, min, seconds) or singular string
473    # return localtime; # same as localtime(time); # http://perldoc.perl.org/functions/localtime.html
474   
475}
476
477sub _load_info_txt
478{
479    my $self = shift (@_);
480    my ($filename) = @_;
481
482    if (defined $filename && &FileUtils::fileExists($filename)) {
483    open (INFILE, $filename) ||
484        die "oaiinfo::load_info couldn't read $filename\n";
485
486    my ($line, @lineparts);
487    while (defined ($line = <INFILE>)) {
488        $line =~ s/\cM|\cJ//g; # remove end-of-line characters
489        @lineparts = split ("\t", $line);
490        if (scalar(@lineparts) >= 2) {
491        $self->add_info (@lineparts);
492        }
493    }
494    close (INFILE);
495    }
496
497
498}
499
500sub _load_info_db
501{
502    my $self = shift (@_);
503    my ($filename) = @_;
504
505    my $infodb_map = {};
506
507    &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
508
509    foreach my $oid ( keys $infodb_map ) {
510    my $vals = $infodb_map->{$oid};
511    # interested in oid, timestamp, deletion status
512
513    my ($deletion_status) = ($vals=~/^<status>(.*)$/m);
514    my ($timestamp) = ($vals=~/^<timestamp>(.*)$/m);
515   
516    $self->add_info ($oid, $deletion_status, $timestamp);
517    }
518}
519
520# if no filename is passed in (and you don't generally want to), then
521# it tries to load in <collection>/etc/oai-inf.<db> if it exists
522sub load_info {
523    my $self = shift (@_);
524    my ($filename) = @_;
525
526    $self->{'info'} = {};
527
528    $filename = $self->{'oaidb_file_path'} unless defined $filename;
529
530    if (&FileUtils::fileExists($filename)) {
531    if ($filename =~ m/\.inf$/) {
532        $self->_load_info_txt($filename);
533    }
534    else {
535        $self->_load_info_db($filename);
536    }
537    }
538
539}
540
541sub _save_info_txt {
542    my $self = shift (@_);
543    my ($filename) = @_;
544
545    my ($OID, $info);
546
547    open (OUTFILE, ">$filename") ||
548    die "oaiinfo::save_info couldn't write $filename\n";
549 
550    foreach $info (@{$self->get_OID_list()}) {
551    if (defined $info) {
552        print OUTFILE join("\t", @$info), "\n";
553    }
554    }
555    close (OUTFILE);
556}
557
558# if no filename is passed in (and you don't generally want to), then
559# this subroutine tries to write to <collection>/etc/oai-inf.<db>.
560sub _save_info_db {
561    my $self = shift (@_);
562    my ($filename) = @_;
563
564    $filename = $self->{'oaidb_file_path'} unless defined $filename;
565    my $infodbtype = $self->{'infodbtype'};
566
567    # write out again. Open file for overwriting, not appending.
568    # Then write out data structure $self->{'info'} that's been maintaining the data in-memory.
569    my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename);
570    foreach my $oid ( keys $self->{'info'} ) {
571    my $OID_info = $self->{'info'}->{$oid};
572    #my $val_hash = {
573    #    "status" => $OID_info->[INFO_STATUS_INDEX],
574    #    "timestamp" => $OID_info->[INFO_TIMESTAMP_INDEX]
575    #};
576   
577    #&dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val_hash);
578
579    my $val = "<status>".$OID_info->[INFO_STATUS_INDEX]."\n<timestamp>".$OID_info->[INFO_TIMESTAMP_INDEX]."\n";
580    &dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val);
581    }
582    &dbutil::close_infodb_write_handle($infodbtype, $infodb_handle);
583}
584
585sub save_info {
586    my $self = shift (@_);
587    my ($filename) = @_;
588
589    if(defined $filename) {
590    if ($filename =~ m/(contents)|(\.inf)$/) {
591        $self->_save_info_txt($filename);
592    }
593    else {
594        $self->_save_info_db($filename);
595    }
596    } else {
597    $self->_save_info_db();
598    }
599}
600
601sub delete_info {
602    my $self = shift (@_);
603    my ($OID) = @_;
604
605    print STDERR "@@@@ ERROR oaiinfo::delete_info: Not allowed to delete entries in oai DB.\n";
606
607    #if (defined $self->{'info'}->{$OID}) {
608    #   delete $self->{'info'}->{$OID};
609    #}
610}
611
612sub set_info { # sets existing or appends
613    my $self = shift (@_);
614    my ($OID, $del_status, $timestamp) = @_;
615    if(!defined $timestamp) { # get current date timestamp
616    $timestamp = $self->get_current_time();
617    }
618    $self->{'info'}->{$OID} = [$del_status, $timestamp];
619
620}
621
622sub add_info { # appends iff it doesn't exist already
623    my $self = shift (@_);
624    my ($OID, $del_status, $timestamp) = @_;
625
626    if (defined($self->{'info'}->{$OID})) {
627    print STDERR "@@@@ ERROR oaiinfo::add_info: id $OID already exists. Not adding.\n";
628    } else {
629    $self->set_info($OID, $del_status, $timestamp);
630    }
631}
632
633sub set_status_info {
634    my $self = shift (@_);
635    my ($OID, $del_status) = @_;
636
637    my $OID_info = $self->{'info'}->{$OID};
638    if (defined $OID_info) {
639    $OID_info->[INFO_STATUS_INDEX] = $del_status;
640    }
641    else {
642    print STDERR "oaiinfo::set_status_info: Unable to find OAI document id $OID\n";
643    }
644}
645
646sub get_status_info {
647    my $self = shift (@_);
648    my ($OID) = @_;
649
650    my $del_status = undef;
651
652    my $OID_info = $self->{'info'}->{$OID};
653    if (defined $OID_info) {
654    $del_status = $OID_info->[INFO_STATUS_INDEX];
655    }
656    else {
657    print STDERR "oaiinfo::get_status_info: Unable to find OAI document id $OID\n";
658    }
659
660    return $del_status;
661}
662
663# if no timestamp provided, uses current timestamp
664sub set_timestamp_info {
665    my $self = shift (@_);
666    my ($OID, $timestamp) = @_;
667
668    my $OID_info = $self->{'info'}->{$OID};
669    if (defined $OID_info) {
670    if(!defined $timestamp) {
671        $timestamp = $self->get_current_time();
672    }
673    $OID_info->[INFO_TIMESTAMP_INDEX] = $timestamp;
674    }
675    else {
676    print STDERR "oaiinfo::set_timestamp_info: Unable to find OAI document id $OID\n";
677    }
678}
679
680sub get_timestamp_info {
681    my $self = shift (@_);
682    my ($OID) = @_;
683
684    my $timestamp = undef;
685
686    my $OID_info = $self->{'info'}->{$OID};
687    if (defined $OID_info) {
688    $timestamp = $OID_info->[INFO_TIMESTAMP_INDEX];
689    }
690    else {
691    print STDERR "oaiinfo::get_timestamp_info: Unable to find OAI document id $OID\n";
692    }
693
694    return $timestamp;
695}
696
697
698# returns a list of the form [[OID, timestamp, deletion_status], ...]
699sub get_OID_list
700{
701    my $self = shift (@_);
702
703    my @list = ();
704
705    foreach my $OID (keys $self->{'info'}) {   
706    my $OID_info = $self->{'info'}->{$OID};
707
708    push (@list, [$OID, $OID_info->[INFO_STATUS_INDEX],
709              $OID_info->[INFO_TIMESTAMP_INDEX]]);
710    }
711
712    return \@list;
713}
714
715
716
717# returns an array/list of the form [deletion_status, timestamp]
718sub get_info {
719    my $self = shift (@_);
720    my ($OID) = @_;
721
722    if (defined $self->{'info'}->{$OID}) {
723    return $self->{'info'}->{$OID};
724    }
725
726    return undef;
727}
728
729
730
731# returns the number of entries so far, including deleted ones
732# http://stackoverflow.com/questions/1109095/how-can-i-find-the-number-of-keys-in-a-hash-in-perl
733sub size {
734    my $self = shift (@_);
735    return (scalar keys $self->{'info'});
736}
737
7381;
Note: See TracBrowser for help on using the browser.