source: main/trunk/greenstone2/perllib/oaiinfo.pm@ 31190

Last change on this file since 31190 was 31190, checked in by ak19, 7 years ago

First major commit to do with the new oaiinfo db that keeps track of which oids are deleted. Still need to fix up issue with the new remove and rename subroutines of dbutil's jdbm not being called to clean up *.lg log file associated with main db file. Still need to clean up unused methods in oaiinfo, remove debugging and test agains GS3 incr-build-with-manifest tutorial. (Previous 3 commits were commits, not all related.)

File size: 25.7 KB
Line 
1# This class based on arcinfo.pm
2package oaiinfo;
3
4use constant INFO_STATUS_INDEX => 0;
5use constant INFO_TIMESTAMP_INDEX => 1;
6
7use constant NO_PD_STEP => 0; # no needing to mark db with "provisionally deleted", no PD pass
8use constant DO_PD_STEP => 1; # PD pass required (pass to mark db "PD" and then later undo it).
9# or SPECIAL_PROCESSING_REQUIRED => 1???
10
11use strict;
12
13use arcinfo;
14use dbutil;
15
16# QUESTIONS:
17# Should we use time or localtime(time) for timestamp? Just timestamp.
18# What format should the timestamp be in, or is the basic format used by perl sufficient? Basic.
19
20# File format read in: OID <tab> Date-timestamp <tab> Deletion-Status
21
22# Deletion status can be:
23# E = Doc with OID exists (has not been deleted from collection). Timestamp indicates last time of build
24# D = Doc with OID has been deleted. Timestamp indicates time of deletion
25# PD = Provisionally Deleted. Timestamp momentarily unaltered.
26
27# oaidb is "always incremental": always reflects the I/B/R/D status of archive info db,
28# before the indexing step of the build phase that alters the I/B/R/D contents of archive info db.
29# (I=index, B=been indexed, R=reindex; D=delete)
30
31sub new {
32 my $class = shift(@_);
33 my ($config_filename, $infodbtype) = @_;
34
35 my $self = {
36 'info'=>{} # map of {OID, array[deletion-status,timestamp]} pairs
37 };
38
39 if(!defined $infodbtype) {
40 $infodbtype = &dbutil::get_default_infodb_type();
41 }
42 $infodbtype = "gdbm" if ($infodbtype eq "gdbm-txtgz");
43 $self->{'infodbtype'} = $infodbtype;
44
45 # Create and store the db filenames we'll be working with (tmp and livedb)
46 my $etc_dir = &util::get_parent_folder($config_filename);
47
48 my $perform_firsttime_init = 0;
49 $self->{'oaidb_live_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf", $etc_dir, $perform_firsttime_init);
50 $self->{'oaidb_tmp_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $etc_dir, $perform_firsttime_init);
51 $self->{'etc_dir'} = $etc_dir;
52# print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
53# print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
54
55 $self->{'oaidb_file_path'} = $self->{'oaidb_tmp_filepath'}; # db file we're working with
56
57 return bless $self, $class;
58}
59
60# this subroutine will work out the starting contents of the tmp-db (temporary oai db):
61# whether it should start off empty, or with the contents of any existing live-db,
62# or with the contents of any existing tmp-db.
63sub init_tmpdb {
64 my $self = shift(@_);
65 my ($removeold, $have_manifest) = @_;
66
67 # if we have a manifest file, then we pretend we are fully incremental for oaiinfo db.
68 # removeold implies proper full-rebuild, whereas keepold or incremental means incremental
69 if($have_manifest) { # if we have a manifest file, we're not doing removeold/full-rebuild either
70 $removeold = 0;
71 }
72
73 if($removeold) {
74 $self->{'removeold_no_manifest'} = 1;
75 $self->{'do_pd_step'} = DO_PD_STEP; # step where all E will be marked as PD
76 } else {
77 $self->{'removeold_no_manifest'} = 0;
78 $self->{'do_pd_step'} = NO_PD_STEP;
79 }
80
81 my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
82 my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
83 my $infodbtype = $self->{'infodbtype'};
84 # Note: the live db can only exist if the collection has been activated at least once before
85 my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
86 my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
87
88# print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
89# print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
90
91 my $initdb = 0;
92
93 # work out what operation we need to do
94 # work with empty tmpdb
95 # copy_livedb_to_tmpdb
96 # work with existing tmpdb (so existing tmpdb will be topped up)
97
98 # make_contents_of_tmpdb_empty
99 # make_contents_of_tmpdb_that_of_livedb
100 # continue_working_with_tmpdb ("contents_of_tmpdb_is_tmpdb")
101
102 # We're going to prepare the starting state of tmpdb next.
103 # It can start off empty, start off with the contents of livedb, or it can start off with the contents
104 # of the existing tmp db. Which of these three it is depends on the 3 factors: whether livedb exists,
105 # whether tmpdb exists and whether or not removeold is true.
106 # i.o.w. which of the 3 outcomes it is depends on the truth table built on the following 3 variables:
107 # LDB = LiveDB exists
108 # TDB = TmpDB exists
109 # RO = Removeold
110 # OUTCOMES:
111 # clean slate (create an empty tmpdb/make tmpdb empty)
112 # top up tmpDB (work with existing tmpdb)
113 # copy LiveDB to TmpDB (liveDB's contents become the contents of TmpDB, and we'll work with that)
114 #
115 # TRUTH TABLE:
116 # ---------------------------------------
117 # LDB TDB RO | Outcome
118 # ---------------------------------------
119 # 0 0 0 | clean-slate
120 # 0 0 1 | clean-slate
121 # 0 1 0 | top-up-tmpdb
122 # 0 1 1 | erase tmpdb, clean-slate
123 # 1 0 0 | copy livedb to tmpdb
124 # 1 0 1 | copy livedb to tmpdb
125 # 1 1 0 | top-up-tmpdb
126 # 1 1 1 | copy livedb to tmpd
127 # ---------------------------------------
128 #
129 # Dr Bainbridge worked out using Karnaugh maps that, from the above truth table:
130 # => clean-slate/empty-tmpdb = !LDB && (RO || !TDB)
131 # => top-up-tmpdb/work-with-existing-tmpdb = !RO && TDB
132 # => copy-livedb-to-tmpdb = LDB && (!TDB || RO)
133 # I had most of these tests, except that I hadn't (yet) merged the two clean slate instances
134 # of first-build-ever and make-contents-of-tmpdb-empty
135
136 #my $first_build_ever = (!$livedb_exists && !$tmpdb_exists);
137 #my $make_contents_of_tmpdb_empty = (!$livedb_exists && $tmpdb_exists && $removeold);
138 # Karnaugh map allows merging $first_build_ever and $make_contents_of_tmpdb_empty above
139 # into: my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
140 my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
141 my $make_contents_of_tmpdb_that_of_livedb = ($livedb_exists && (!$tmpdb_exists || $removeold));
142 my $work_with_existing_tmpdb = ($tmpdb_exists && !$removeold);
143
144 if($work_with_empty_tmpdb) { # we'll use an empty tmpdb
145
146 # If importing the collection for the very first time, neither db exists,
147 # so create an empty tmpdb.
148 #
149 # We also create an empty tmpdb when livedb doesn't exist and $removeold is true.
150 # This can happen if we've never run activate (so no livedb),
151 # yet had done some import (and perhaps building) followed by a full re-import now.
152 # Since there was no activate and we're doing a removeold/full-rebuild now, can just
153 # work with a new tmpdb, even though one already existed, its contents can be wiped out.
154 # In such a scenario, we'll be deleting tmpdb. Then there will be no livedb nor any tmpdb
155 # any more, so same situation as if importing the very first time when no oaidb exists either.
156
157 &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
158 $initdb = 1; # new tmpdb
159
160 # if the oai db is created the first time, it's like incremental and
161 # "keepold" (keepold means "only add, don't reprocess existing"). So
162 # no need to do the special passes dealing with "provisional deletes".
163 $self->{'do_pd_step'} = NO_PD_STEP;
164
165 } elsif ($make_contents_of_tmpdb_that_of_livedb) {
166
167 # If the livedb exists and we're doing a full rebuild ($removeold is true),
168 # copy livedb to tmp regardless of if tmpdb already exists.
169 # Or if the livedb exists and tmpdb doesn't exist, it doesn't matter
170 # if we're incremental or not: also copy live to tmp and work with tmp.
171
172 # copy livedb to tmpdb
173 &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
174 &FileUtils::copyFiles($oaidb_live_filepath, $oaidb_tmp_filepath);
175
176 $initdb = 0; # tmpdb exists, since we just copied livedb to tmpdb, so need to init existing tmpdb
177
178 } else { # $work_with_existing_tmpdb, so we'll build on top of what's presently already in tmpdb
179 # (we'll be topping up the current tmpdb)
180
181 # !$removeold, meaning incremental
182 # If incremental and have a tmpdb already, regardless of whether livedb exists,
183 # then work with the existing tmpdb file, as this means we've been
184 # importing (perhaps followed by building) repeatedly without activating the
185 # last time but want to maintain the (incremental) changes in tmpdb.
186
187 $initdb = 0;
188
189 } # Dr Bainbridge drew up Karnaugh maps on the truth table, which proved that all cases
190 # are indeed covered above, so don't need any other catch-all else here
191
192 $self->{'oaidb_file_path'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $self->{'etc_dir'}, $initdb);
193 # final param follows jmt's $perform_firsttime_init in inexport.pm
194
195# print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n";
196
197}
198
199sub set_proc_mode {
200 my $self = shift (@_);
201 my ($mode) = @_;
202
203 $self->{'do_pd_step'} = $mode;
204}
205
206# returns 1 if doing PD step (marking entries as provisionally deleted)
207# returns 0 if not doing PD step, which happens when we're purely incremental or building first time.
208sub get_proc_mode {
209 my $self = shift (@_);
210 return $self->{'do_pd_step'};
211}
212
213sub get_filepath {
214 my $self = shift (@_);
215 return $self->{'oaidb_file_path'};
216}
217
218sub import_stage {
219 my $self = shift (@_);
220 my ($removeold, $have_manifest) = @_;
221
222 $self->init_tmpdb($removeold, $have_manifest);
223
224 my $do_pd_step = $self->{'do_pd_step'}; # 1 if the step to mark oaidb entries as PD is required
225 # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db,
226 # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out
227
228
229 $self->load_info();
230 $self->print_info(); # DEBUGGING
231
232 if ($do_pd_step) {
233 $self->mark_all_existing_as_provisionallydeleted();
234 $self->print_info(); # DEBUGGING
235
236 # save to db file now that we're done
237 $self->save_info();
238 }
239
240}
241
242sub building_stage_before_indexing() {
243 my $self = shift (@_);
244 my ($archivedir) = @_;
245
246 # load archive info db into memory
247 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $archivedir);
248 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-src", $archivedir);
249 my $archive_info = new arcinfo ($self->{'infodbtype'});
250 $archive_info->load_info ($arcinfo_doc_filename);
251
252 #my $started_from_scratch = &FileUtils::fileTest($self->{'oaidb_tmp_filepath'}, '-z'); # 1 if tmpdb is empty
253 # -z test for file is empty http://www.perlmonks.org/?node_id=927447
254
255 # load the oaidb file's contents into memory.
256 $self->load_info();
257 $self->print_info(); # DEBUGGING
258
259 # process all the index, reindex and delete operations as indicated in arcinfo,
260 # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db.
261
262 foreach my $OID (keys $archive_info->{'info'}) {
263 my $arcinf_tuple = $archive_info->{'info'}->{$OID};
264 my $indexing_status = $arcinf_tuple->[arcinfo::INFO_STATUS_INDEX];
265 # use packageName::constant to refer to constants declared in another package,
266 # see http://perldoc.perl.org/constant.html
267
268 print STDERR "######## OID: $OID - status: $indexing_status\n";
269
270 if($indexing_status eq "I") {
271 $self->index($OID); # add new as E with current timestamp/or set existing as E with orig timestamp
272 } elsif($indexing_status eq "R") {
273 $self->reindex($OID); # update timestamp and ensure marked as E (if oid doesn't exist, add new)
274 } elsif($indexing_status eq "D") {
275 $self->delete($OID); # set as D with current timestamp
276 } elsif($indexing_status eq "B") { # B for "been indexed"
277 $self->been_indexed($OID); # will flip any PD to E if oid exists, else will add new entry for oid
278 # A new entry may be required if the collection had been built prior to turning this into
279 # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here.
280 } else {
281 print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
282 }
283 }
284
285 # once all docs processed, go through oaiiinfo db changing any PDs to D along with current timestamp
286 # to indicate that they're deleted
287 $self->mark_all_provisionallydeleted_as_deleted();
288 $self->print_info();
289
290 # let's save to db file now that we're done
291 $self->save_info();
292
293}
294
295sub activate_collection { # move tmp db to live db
296 my $self = shift (@_);
297
298 my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
299 my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
300
301 my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
302 my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
303
304 if($tmpdb_exists) {
305 if($livedb_exists) {
306 #&dbutil::remove_db_file($self->{'infodbtype'}, s$oaidb_live_filepath); # remove the db file and any assoc files
307 &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_live_filepath, $oaidb_live_filepath.".bak"); # rename the db file and any assoc files
308 }
309 #&FileUtils::moveFiles($oaidb_tmp_filepath, $oaidb_live_filepath);
310 &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_tmp_filepath, $oaidb_live_filepath); # rename the db file and any assoc files
311
312 print STDERR "#### Should now have MOVED $self->{'oaidb_tmp_filepath'} to $self->{'oaidb_live_filepath'}\n";
313
314 } else {
315 print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
316 print STDERR "@@@@@ No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
317 print STDERR "@@@@@ to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
318 }
319}
320
321############################### FOR FULL AND INCR BUILDING ##########################
322# add or reindex if incremental (updating timestamp to current),
323# add or (re-)mark PDs as status=E for existing if full build, keeping timestamp.
324# This subroutine will end up adding if firstbuild (since then the OID won't already exist)
325sub Xprocess_new_or_existing {
326 my $self = shift (@_);
327 my ($OID) = @_;
328
329 my $OID_info = $self->{'info'}->{$OID};
330 if (defined $OID_info) {
331
332 if ($self->{'do_pd_step'} == NO_PD_STEP) { # update modification time for reindexed doc
333 $OID_info->[INFO_TIMESTAMP_INDEX] = $self->get_current_time();
334 }
335 else { # mode is DO_PD_STEP
336 $OID_info->[INFO_STATUS_INDEX] = "E"; # will flip any PD status of existing documents back to E,
337 # so need to keep existing timestamps.
338 # Can't be called on deleted docs, so status was never D
339 # $self->set_status_info($OID, "E"); # will only set it if OID exists
340
341
342 }
343 } else { # if OID is not present, then it's new and now added as existing from current time on
344 $self->set_info($OID, "E", $self->get_current_time());
345 }
346
347}
348
349##################### SPECIFIC TO PD-STEP ####################
350
351
352# mark all existing, E (non-deleted) OIDs as Provisionally Deleted (PD)
353# this subroutine doesn't save to oai-inf.DB
354# the caller should call save_info when they want to save to the db
355sub mark_all_existing_as_provisionallydeleted {
356 my $self = shift (@_);
357
358 print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n";
359
360 foreach my $OID (keys $self->{'info'}) {
361 my $curr_status = $self->get_status_info($OID);
362 if(defined $curr_status && $curr_status eq "E") {
363 $self->set_status_info($OID, "PD");
364 }
365 }
366}
367
368# mark all OIDs that are Provisionally Deleted (PD) as deleted, and set to current timestamp
369# To be called at end of build. Again, the caller should save to DB by calling save_info.
370sub mark_all_provisionallydeleted_as_deleted {
371 my $self = shift (@_);
372
373 print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n";
374
375 foreach my $OID (keys $self->{'info'}) {
376 my $curr_status = $self->get_status_info($OID);
377 if(defined $curr_status && $curr_status eq "PD") {
378 $self->set_info($OID, "D", $self->get_current_time());
379 }
380 }
381}
382
383# find the OID, if it exists, make its status=E for existing.
384sub Xset_status_to_existing_if_OID_present {
385 my $self = shift (@_);
386 my ($OID) = @_;
387
388 $self->set_status_info($OID, "E"); # will only set it if OID exists
389}
390
391##################### GENERAL, NOT SPECIFIC TO PD-STEP ####################
392
393sub print_info {
394 my $self = shift (@_);
395
396 print STDERR "###########################################################\n";
397 print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n";
398
399 foreach my $OID (keys $self->{'info'}) {
400 print STDERR "OID: $OID";
401 print STDERR " status: " . $self->{'info'}->{$OID}->[INFO_STATUS_INDEX];
402 print STDERR " time: " . $self->{'info'}->{$OID}->[INFO_TIMESTAMP_INDEX];
403 print STDERR "\n";
404 }
405
406 print STDERR "###########################################################\n";
407}
408
409
410# Find the OID, if it exists, make its status=E for existing. Leave its timestamp alone.
411# If the OID doesn't yet exist, add it as a new entry with status=E and with current timestamp.
412sub index { # Add a new oid with current time and E. If the oid was already present, mark as E
413 my $self = shift (@_);
414 my ($OID) = @_;
415
416 my $OID_info = $self->{'info'}->{$OID};
417
418 if (defined $OID_info) { # if OID is present, this will change status back to E, timestamp unchanged
419 $OID_info->[INFO_STATUS_INDEX] = "E";
420
421 } else { # if OID is not present, then it's now added as existing from current time on
422 $self->set_info($OID, "E", $self->get_current_time());
423 }
424}
425
426# Upon reindexing a document with identifier OID, change its timestamp to current time
427# if a new OID, then add as new entry with status=E and current timestamp
428sub reindex { # update timestamp if oid is already present, if not (unlikely), add as new
429 my $self = shift (@_);
430 my ($OID) = @_;
431
432 my $OID_info = $self->{'info'}->{$OID};
433 $self->set_info($OID, "E", $self->get_current_time()); # Takes care of 3 things:
434 # if OID exists, updates modified time to indicate the doc has been reindexed
435 # if OID exists, ensures any status=PD is flipped back to E for this OID doc (as we know it exists);
436 # if the OID doesn't yet exist, adds a new OID entry with status=E and current timestamp.
437
438}
439
440
441# Does the same as index():
442# OIDs that have been indexed upon rebuild may still be new to the oaidb: GS2 collections
443# are not OAI collections by default, unlike GS3 collections. Imagine rebuilding a (GS2) collection
444# 5 times and then setting them to be an OAI collection. In that case, the doc OIDs in the collection
445# may not be in the oaidb yet. Unless, we decide (as is the present case) to always maintain an oaidb
446# (always creating an oaidb regardless of whether the collection has OAI support turned on or not).
447sub been_indexed {
448 my $self = shift (@_);
449 my ($OID) = @_;
450
451 $self->index($OID);
452}
453
454# Upon deleting a document with identifier OID,
455# set status to deleted and change its timestamp to current time
456sub delete {
457 my $self = shift (@_);
458 my ($OID) = @_;
459
460 # the following method will set to current time if no timestamp provided,
461 # But by explicit here, the code is easier to follow
462 $self->set_info($OID, "D", $self->get_current_time());
463
464}
465
466#############################################################
467sub get_current_time {
468 my $self = shift (@_);
469 return time; # current time
470
471 # localtime(time) returns an array of values (day, month, year, hour, min, seconds) or singular string
472 # return localtime; # same as localtime(time); # http://perldoc.perl.org/functions/localtime.html
473
474}
475
476sub _load_info_txt
477{
478 my $self = shift (@_);
479 my ($filename) = @_;
480
481 if (defined $filename && &FileUtils::fileExists($filename)) {
482 open (INFILE, $filename) ||
483 die "oaiinfo::load_info couldn't read $filename\n";
484
485 my ($line, @lineparts);
486 while (defined ($line = <INFILE>)) {
487 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
488 @lineparts = split ("\t", $line);
489 if (scalar(@lineparts) >= 2) {
490 $self->add_info (@lineparts);
491 }
492 }
493 close (INFILE);
494 }
495
496
497}
498
499sub _load_info_db
500{
501 my $self = shift (@_);
502 my ($filename) = @_;
503
504 my $infodb_map = {};
505
506 &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
507
508 foreach my $oid ( keys $infodb_map ) {
509 my $vals = $infodb_map->{$oid};
510 # interested in oid, timestamp, deletion status
511
512 my ($deletion_status) = ($vals=~/^<status>(.*)$/m);
513 my ($timestamp) = ($vals=~/^<timestamp>(.*)$/m);
514
515 $self->add_info ($oid, $deletion_status, $timestamp);
516 }
517}
518
519# if no filename is passed in (and you don't generally want to), then
520# it tries to load in <collection>/etc/oai-inf.<db> if it exists
521sub load_info {
522 my $self = shift (@_);
523 my ($filename) = @_;
524
525 $self->{'info'} = {};
526
527 $filename = $self->{'oaidb_file_path'} unless defined $filename;
528
529 if (&FileUtils::fileExists($filename)) {
530 if ($filename =~ m/\.inf$/) {
531 $self->_load_info_txt($filename);
532 }
533 else {
534 $self->_load_info_db($filename);
535 }
536 }
537
538}
539
540sub _save_info_txt {
541 my $self = shift (@_);
542 my ($filename) = @_;
543
544 my ($OID, $info);
545
546 open (OUTFILE, ">$filename") ||
547 die "oaiinfo::save_info couldn't write $filename\n";
548
549 foreach $info (@{$self->get_OID_list()}) {
550 if (defined $info) {
551 print OUTFILE join("\t", @$info), "\n";
552 }
553 }
554 close (OUTFILE);
555}
556
557# if no filename is passed in (and you don't generally want to), then
558# this subroutine tries to write to <collection>/etc/oai-inf.<db>.
559sub _save_info_db {
560 my $self = shift (@_);
561 my ($filename) = @_;
562
563 $filename = $self->{'oaidb_file_path'} unless defined $filename;
564 my $infodbtype = $self->{'infodbtype'};
565
566 # write out again. Open file for overwriting, not appending.
567 # Then write out data structure $self->{'info'} that's been maintaining the data in-memory.
568 my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename);
569 foreach my $oid ( keys $self->{'info'} ) {
570 my $OID_info = $self->{'info'}->{$oid};
571 #my $val_hash = {
572 # "status" => $OID_info->[INFO_STATUS_INDEX],
573 # "timestamp" => $OID_info->[INFO_TIMESTAMP_INDEX]
574 #};
575
576 #&dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val_hash);
577
578 my $val = "<status>".$OID_info->[INFO_STATUS_INDEX]."\n<timestamp>".$OID_info->[INFO_TIMESTAMP_INDEX]."\n";
579 &dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val);
580 }
581 &dbutil::close_infodb_write_handle($infodbtype, $infodb_handle);
582}
583
584sub save_info {
585 my $self = shift (@_);
586 my ($filename) = @_;
587
588 if(defined $filename) {
589 if ($filename =~ m/(contents)|(\.inf)$/) {
590 $self->_save_info_txt($filename);
591 }
592 else {
593 $self->_save_info_db($filename);
594 }
595 } else {
596 $self->_save_info_db();
597 }
598}
599
600sub delete_info {
601 my $self = shift (@_);
602 my ($OID) = @_;
603
604 print STDERR "@@@@ ERROR oaiinfo::delete_info: Not allowed to delete entries in oai DB.\n";
605
606 #if (defined $self->{'info'}->{$OID}) {
607 # delete $self->{'info'}->{$OID};
608 #}
609}
610
611sub set_info { # sets existing or appends
612 my $self = shift (@_);
613 my ($OID, $del_status, $timestamp) = @_;
614 if(!defined $timestamp) { # get current date timestamp
615 $timestamp = $self->get_current_time();
616 }
617 $self->{'info'}->{$OID} = [$del_status, $timestamp];
618
619}
620
621sub add_info { # appends iff it doesn't exist already
622 my $self = shift (@_);
623 my ($OID, $del_status, $timestamp) = @_;
624
625 if (defined($self->{'info'}->{$OID})) {
626 print STDERR "@@@@ ERROR oaiinfo::add_info: id $OID already exists. Not adding.\n";
627 } else {
628 $self->set_info($OID, $del_status, $timestamp);
629 }
630}
631
632sub set_status_info {
633 my $self = shift (@_);
634 my ($OID, $del_status) = @_;
635
636 my $OID_info = $self->{'info'}->{$OID};
637 if (defined $OID_info) {
638 $OID_info->[INFO_STATUS_INDEX] = $del_status;
639 }
640 else {
641 print STDERR "oaiinfo::set_status_info: Unable to find OAI document id $OID\n";
642 }
643}
644
645sub get_status_info {
646 my $self = shift (@_);
647 my ($OID) = @_;
648
649 my $del_status = undef;
650
651 my $OID_info = $self->{'info'}->{$OID};
652 if (defined $OID_info) {
653 $del_status = $OID_info->[INFO_STATUS_INDEX];
654 }
655 else {
656 print STDERR "oaiinfo::get_status_info: Unable to find OAI document id $OID\n";
657 }
658
659 return $del_status;
660}
661
662# if no timestamp provided, uses current timestamp
663sub set_timestamp_info {
664 my $self = shift (@_);
665 my ($OID, $timestamp) = @_;
666
667 my $OID_info = $self->{'info'}->{$OID};
668 if (defined $OID_info) {
669 if(!defined $timestamp) {
670 $timestamp = $self->get_current_time();
671 }
672 $OID_info->[INFO_TIMESTAMP_INDEX] = $timestamp;
673 }
674 else {
675 print STDERR "oaiinfo::set_timestamp_info: Unable to find OAI document id $OID\n";
676 }
677}
678
679sub get_timestamp_info {
680 my $self = shift (@_);
681 my ($OID) = @_;
682
683 my $timestamp = undef;
684
685 my $OID_info = $self->{'info'}->{$OID};
686 if (defined $OID_info) {
687 $timestamp = $OID_info->[INFO_TIMESTAMP_INDEX];
688 }
689 else {
690 print STDERR "oaiinfo::get_timestamp_info: Unable to find OAI document id $OID\n";
691 }
692
693 return $timestamp;
694}
695
696
697# returns a list of the form [[OID, timestamp, deletion_status], ...]
698sub get_OID_list
699{
700 my $self = shift (@_);
701
702 my @list = ();
703
704 foreach my $OID (keys $self->{'info'}) {
705 my $OID_info = $self->{'info'}->{$OID};
706
707 push (@list, [$OID, $OID_info->[INFO_STATUS_INDEX],
708 $OID_info->[INFO_TIMESTAMP_INDEX]]);
709 }
710
711 return \@list;
712}
713
714
715
716# returns an array/list of the form [deletion_status, timestamp]
717sub get_info {
718 my $self = shift (@_);
719 my ($OID) = @_;
720
721 if (defined $self->{'info'}->{$OID}) {
722 return $self->{'info'}->{$OID};
723 }
724
725 return undef;
726}
727
728
729
730# returns the number of entries so far, including deleted ones
731# http://stackoverflow.com/questions/1109095/how-can-i-find-the-number-of-keys-in-a-hash-in-perl
732sub size {
733 my $self = shift (@_);
734 return (scalar keys $self->{'info'});
735}
736
7371;
Note: See TracBrowser for help on using the repository browser.