source: main/trunk/greenstone2/perllib/oaiinfo.pm@ 31191

Last change on this file since 31191 was 31191, checked in by ak19, 7 years ago

Correction to previous commit.

File size: 25.7 KB
Line 
1# This class based on arcinfo.pm
2package oaiinfo;
3
4use constant INFO_STATUS_INDEX => 0;
5use constant INFO_TIMESTAMP_INDEX => 1;
6
7use constant NO_PD_STEP => 0; # no needing to mark db with "provisionally deleted", no PD pass
8use constant DO_PD_STEP => 1; # PD pass required (pass to mark db "PD" and then later undo it).
9# or SPECIAL_PROCESSING_REQUIRED => 1???
10
11use strict;
12
13use arcinfo;
14use dbutil;
15
16# QUESTIONS:
17# Should we use time or localtime(time) for timestamp? Just timestamp.
18# What format should the timestamp be in, or is the basic format used by perl sufficient? Basic.
19
20# File format read in: OID <tab> Date-timestamp <tab> Deletion-Status
21
22# Deletion status can be:
23# E = Doc with OID exists (has not been deleted from collection). Timestamp indicates last time of build
24# D = Doc with OID has been deleted. Timestamp indicates time of deletion
25# PD = Provisionally Deleted. Timestamp momentarily unaltered.
26
27# oaidb is "always incremental": always reflects the I/B/R/D status of archive info db,
28# before the indexing step of the build phase that alters the I/B/R/D contents of archive info db.
29# (I=index, B=been indexed, R=reindex; D=delete)
30
31sub new {
32 my $class = shift(@_);
33 my ($config_filename, $infodbtype) = @_;
34
35 my $self = {
36 'info'=>{} # map of {OID, array[deletion-status,timestamp]} pairs
37 };
38
39 if(!defined $infodbtype) {
40 $infodbtype = &dbutil::get_default_infodb_type();
41 }
42 $infodbtype = "gdbm" if ($infodbtype eq "gdbm-txtgz");
43 $self->{'infodbtype'} = $infodbtype;
44
45 # Create and store the db filenames we'll be working with (tmp and livedb)
46 my $etc_dir = &util::get_parent_folder($config_filename);
47
48 my $perform_firsttime_init = 0;
49 $self->{'oaidb_live_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf", $etc_dir, $perform_firsttime_init);
50 $self->{'oaidb_tmp_filepath'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $etc_dir, $perform_firsttime_init);
51 $self->{'etc_dir'} = $etc_dir;
52# print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
53# print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
54
55 $self->{'oaidb_file_path'} = $self->{'oaidb_tmp_filepath'}; # db file we're working with
56
57 return bless $self, $class;
58}
59
60# this subroutine will work out the starting contents of the tmp-db (temporary oai db):
61# whether it should start off empty, or with the contents of any existing live-db,
62# or with the contents of any existing tmp-db.
63sub init_tmpdb {
64 my $self = shift(@_);
65 my ($removeold, $have_manifest) = @_;
66
67 # if we have a manifest file, then we pretend we are fully incremental for oaiinfo db.
68 # removeold implies proper full-rebuild, whereas keepold or incremental means incremental
69 if($have_manifest) { # if we have a manifest file, we're not doing removeold/full-rebuild either
70 $removeold = 0;
71 }
72
73 if($removeold) {
74 $self->{'removeold_no_manifest'} = 1;
75 $self->{'do_pd_step'} = DO_PD_STEP; # step where all E will be marked as PD
76 } else {
77 $self->{'removeold_no_manifest'} = 0;
78 $self->{'do_pd_step'} = NO_PD_STEP;
79 }
80
81 my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
82 my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
83 my $infodbtype = $self->{'infodbtype'};
84 # Note: the live db can only exist if the collection has been activated at least once before
85 my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
86 my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
87
88# print STDERR "############ LIVE DB: $self->{'oaidb_live_filepath'}\n";
89# print STDERR "############ TMP DB: $self->{'oaidb_tmp_filepath'}\n";
90
91 my $initdb = 0;
92
93 # work out what operation we need to do
94 # work with empty tmpdb
95 # copy_livedb_to_tmpdb
96 # work with existing tmpdb (so existing tmpdb will be topped up)
97
98 # make_contents_of_tmpdb_empty
99 # make_contents_of_tmpdb_that_of_livedb
100 # continue_working_with_tmpdb ("contents_of_tmpdb_is_tmpdb")
101
102 # We're going to prepare the starting state of tmpdb next.
103 # It can start off empty, start off with the contents of livedb, or it can start off with the contents
104 # of the existing tmp db. Which of these three it is depends on the 3 factors: whether livedb exists,
105 # whether tmpdb exists and whether or not removeold is true.
106 # i.o.w. which of the 3 outcomes it is depends on the truth table built on the following 3 variables:
107 # LDB = LiveDB exists
108 # TDB = TmpDB exists
109 # RO = Removeold
110 # OUTCOMES:
111 # clean slate (create an empty tmpdb/make tmpdb empty)
112 # top up tmpDB (work with existing tmpdb)
113 # copy LiveDB to TmpDB (liveDB's contents become the contents of TmpDB, and we'll work with that)
114 #
115 # TRUTH TABLE:
116 # ---------------------------------------
117 # LDB TDB RO | Outcome
118 # ---------------------------------------
119 # 0 0 0 | clean-slate
120 # 0 0 1 | clean-slate
121 # 0 1 0 | top-up-tmpdb
122 # 0 1 1 | erase tmpdb, clean-slate
123 # 1 0 0 | copy livedb to tmpdb
124 # 1 0 1 | copy livedb to tmpdb
125 # 1 1 0 | top-up-tmpdb
126 # 1 1 1 | copy livedb to tmpd
127 # ---------------------------------------
128 #
129 # Dr Bainbridge worked out using Karnaugh maps that, from the above truth table:
130 # => clean-slate/empty-tmpdb = !LDB && (RO || !TDB)
131 # => top-up-tmpdb/work-with-existing-tmpdb = !RO && TDB
132 # => copy-livedb-to-tmpdb = LDB && (!TDB || RO)
133 # I had most of these tests, except that I hadn't (yet) merged the two clean slate instances
134 # of first-build-ever and make-contents-of-tmpdb-empty
135
136 #my $first_build_ever = (!$livedb_exists && !$tmpdb_exists);
137 #my $make_contents_of_tmpdb_empty = (!$livedb_exists && $tmpdb_exists && $removeold);
138 # Karnaugh map allows merging $first_build_ever and $make_contents_of_tmpdb_empty above
139 # into: my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
140 my $work_with_empty_tmpdb = (!$livedb_exists && (!$tmpdb_exists || $removeold));
141 my $make_contents_of_tmpdb_that_of_livedb = ($livedb_exists && (!$tmpdb_exists || $removeold));
142 my $work_with_existing_tmpdb = ($tmpdb_exists && !$removeold);
143
144 if($work_with_empty_tmpdb) { # we'll use an empty tmpdb
145
146 # If importing the collection for the very first time, neither db exists,
147 # so create an empty tmpdb.
148 #
149 # We also create an empty tmpdb when livedb doesn't exist and $removeold is true.
150 # This can happen if we've never run activate (so no livedb),
151 # yet had done some import (and perhaps building) followed by a full re-import now.
152 # Since there was no activate and we're doing a removeold/full-rebuild now, can just
153 # work with a new tmpdb, even though one already existed, its contents can be wiped out.
154 # In such a scenario, we'll be deleting tmpdb. Then there will be no livedb nor any tmpdb
155 # any more, so same situation as if importing the very first time when no oaidb exists either.
156
157 &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
158 $initdb = 1; # new tmpdb
159
160 # if the oai db is created the first time, it's like incremental and
161 # "keepold" (keepold means "only add, don't reprocess existing"). So
162 # no need to do the special passes dealing with "provisional deletes".
163 $self->{'do_pd_step'} = NO_PD_STEP;
164
165 } elsif ($make_contents_of_tmpdb_that_of_livedb) {
166
167 # If the livedb exists and we're doing a full rebuild ($removeold is true),
168 # copy livedb to tmp regardless of if tmpdb already exists.
169 # Or if the livedb exists and tmpdb doesn't exist, it doesn't matter
170 # if we're incremental or not: also copy live to tmp and work with tmp.
171
172 # copy livedb to tmpdb
173 &dbutil::remove_db_file($self->{'infodbtype'}, $oaidb_tmp_filepath) if $tmpdb_exists; # remove the db file and any assoc files
174 &FileUtils::copyFiles($oaidb_live_filepath, $oaidb_tmp_filepath);
175
176 $initdb = 0; # tmpdb exists, since we just copied livedb to tmpdb, so need to init existing tmpdb
177
178 } else { # $work_with_existing_tmpdb, so we'll build on top of what's presently already in tmpdb
179 # (we'll be topping up the current tmpdb)
180
181 # !$removeold, meaning incremental
182 # If incremental and have a tmpdb already, regardless of whether livedb exists,
183 # then work with the existing tmpdb file, as this means we've been
184 # importing (perhaps followed by building) repeatedly without activating the
185 # last time but want to maintain the (incremental) changes in tmpdb.
186
187 $initdb = 0;
188
189 } # Dr Bainbridge drew up Karnaugh maps on the truth table, which proved that all cases
190 # are indeed covered above, so don't need any other catch-all else here
191
192 $self->{'oaidb_file_path'} = &dbutil::get_infodb_file_path($infodbtype, "oai-inf-tmp", $self->{'etc_dir'}, $initdb);
193 # final param follows jmt's $perform_firsttime_init in inexport.pm
194
195# print STDERR "@@@@@ oaidb: $self->{'oaidb_file_path'}\n";
196
197 return $self->{'do_pd_step'};
198}
199
200sub set_proc_mode {
201 my $self = shift (@_);
202 my ($mode) = @_;
203
204 $self->{'do_pd_step'} = $mode;
205}
206
207# returns 1 if doing PD step (marking entries as provisionally deleted)
208# returns 0 if not doing PD step, which happens when we're purely incremental or building first time.
209sub get_proc_mode {
210 my $self = shift (@_);
211 return $self->{'do_pd_step'};
212}
213
214sub get_filepath {
215 my $self = shift (@_);
216 return $self->{'oaidb_file_path'};
217}
218
219sub import_stage {
220 my $self = shift (@_);
221 my ($removeold, $have_manifest) = @_;
222
223
224 my $do_pd_step = $self->init_tmpdb($removeold, $have_manifest);
225 # 1 if the step to mark oaidb entries as PD is required
226 # if we're doing full rebuilding and it's NOT the first time creating the oai_inf db,
227 # then the tasks to do with PD (provisionally deleted) OAI OIDs should be carried out
228
229
230 $self->load_info();
231 $self->print_info(); # DEBUGGING
232
233 if ($do_pd_step) {
234 $self->mark_all_existing_as_provisionallydeleted();
235 $self->print_info(); # DEBUGGING
236
237 # save to db file now that we're done
238 $self->save_info();
239 }
240
241}
242
243sub building_stage_before_indexing() {
244 my $self = shift (@_);
245 my ($archivedir) = @_;
246
247 # load archive info db into memory
248 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $archivedir);
249 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-src", $archivedir);
250 my $archive_info = new arcinfo ($self->{'infodbtype'});
251 $archive_info->load_info ($arcinfo_doc_filename);
252
253 #my $started_from_scratch = &FileUtils::fileTest($self->{'oaidb_tmp_filepath'}, '-z'); # 1 if tmpdb is empty
254 # -z test for file is empty http://www.perlmonks.org/?node_id=927447
255
256 # load the oaidb file's contents into memory.
257 $self->load_info();
258 $self->print_info(); # DEBUGGING
259
260 # process all the index, reindex and delete operations as indicated in arcinfo,
261 # all the while ensuring all PDs are changed back to E for OIDs that exist in both arcinfo and oaiinfo db.
262
263 foreach my $OID (keys $archive_info->{'info'}) {
264 my $arcinf_tuple = $archive_info->{'info'}->{$OID};
265 my $indexing_status = $arcinf_tuple->[arcinfo::INFO_STATUS_INDEX];
266 # use packageName::constant to refer to constants declared in another package,
267 # see http://perldoc.perl.org/constant.html
268
269 print STDERR "######## OID: $OID - status: $indexing_status\n";
270
271 if($indexing_status eq "I") {
272 $self->index($OID); # add new as E with current timestamp/or set existing as E with orig timestamp
273 } elsif($indexing_status eq "R") {
274 $self->reindex($OID); # update timestamp and ensure marked as E (if oid doesn't exist, add new)
275 } elsif($indexing_status eq "D") {
276 $self->delete($OID); # set as D with current timestamp
277 } elsif($indexing_status eq "B") { # B for "been indexed"
278 $self->been_indexed($OID); # will flip any PD to E if oid exists, else will add new entry for oid
279 # A new entry may be required if the collection had been built prior to turning this into
280 # an oaicollection. But what if we always maintain an oaidb? Still call $self->index() here.
281 } else {
282 print STDERR "### oaiinfo::building_stage_before_indexing(): Unrecognised indexing status $indexing_status\n";
283 }
284 }
285
286 # once all docs processed, go through oaiiinfo db changing any PDs to D along with current timestamp
287 # to indicate that they're deleted
288 $self->mark_all_provisionallydeleted_as_deleted();
289 $self->print_info();
290
291 # let's save to db file now that we're done
292 $self->save_info();
293
294}
295
296sub activate_collection { # move tmp db to live db
297 my $self = shift (@_);
298
299 my $oaidb_live_filepath = $self->{'oaidb_live_filepath'};
300 my $oaidb_tmp_filepath = $self->{'oaidb_tmp_filepath'};
301
302 my $livedb_exists = &FileUtils::fileExists($oaidb_live_filepath);
303 my $tmpdb_exists = &FileUtils::fileExists($oaidb_tmp_filepath);
304
305 if($tmpdb_exists) {
306 if($livedb_exists) {
307 #&dbutil::remove_db_file($self->{'infodbtype'}, s$oaidb_live_filepath); # remove the db file and any assoc files
308 &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_live_filepath, $oaidb_live_filepath.".bak"); # rename the db file and any assoc files
309 }
310 #&FileUtils::moveFiles($oaidb_tmp_filepath, $oaidb_live_filepath);
311 &dbutil::rename_db_file_to($self->{'infodbtype'}, $oaidb_tmp_filepath, $oaidb_live_filepath); # rename the db file and any assoc files
312
313 print STDERR "#### Should now have MOVED $self->{'oaidb_tmp_filepath'} to $self->{'oaidb_live_filepath'}\n";
314
315 } else {
316 print STDERR "@@@@@ In oaiinfo::activate_collection():\n";
317 print STDERR "@@@@@ No tmpdb at $self->{'oaidb_tmp_filepath'}\n";
318 print STDERR "@@@@@ to make 'live' by moving to $self->{'oaidb_live_filepath'}.\n";
319 }
320}
321
322############################### FOR FULL AND INCR BUILDING ##########################
323# add or reindex if incremental (updating timestamp to current),
324# add or (re-)mark PDs as status=E for existing if full build, keeping timestamp.
325# This subroutine will end up adding if firstbuild (since then the OID won't already exist)
326sub Xprocess_new_or_existing {
327 my $self = shift (@_);
328 my ($OID) = @_;
329
330 my $OID_info = $self->{'info'}->{$OID};
331 if (defined $OID_info) {
332
333 if ($self->{'do_pd_step'} == NO_PD_STEP) { # update modification time for reindexed doc
334 $OID_info->[INFO_TIMESTAMP_INDEX] = $self->get_current_time();
335 }
336 else { # mode is DO_PD_STEP
337 $OID_info->[INFO_STATUS_INDEX] = "E"; # will flip any PD status of existing documents back to E,
338 # so need to keep existing timestamps.
339 # Can't be called on deleted docs, so status was never D
340 # $self->set_status_info($OID, "E"); # will only set it if OID exists
341
342
343 }
344 } else { # if OID is not present, then it's new and now added as existing from current time on
345 $self->set_info($OID, "E", $self->get_current_time());
346 }
347
348}
349
350##################### SPECIFIC TO PD-STEP ####################
351
352
353# mark all existing, E (non-deleted) OIDs as Provisionally Deleted (PD)
354# this subroutine doesn't save to oai-inf.DB
355# the caller should call save_info when they want to save to the db
356sub mark_all_existing_as_provisionallydeleted {
357 my $self = shift (@_);
358
359 print STDERR "@@@@@ oaiinfo::mark_all_E_as_PD(): Marking the E entries as PD\n";
360
361 foreach my $OID (keys $self->{'info'}) {
362 my $curr_status = $self->get_status_info($OID);
363 if(defined $curr_status && $curr_status eq "E") {
364 $self->set_status_info($OID, "PD");
365 }
366 }
367}
368
369# mark all OIDs that are Provisionally Deleted (PD) as deleted, and set to current timestamp
370# To be called at end of build. Again, the caller should save to DB by calling save_info.
371sub mark_all_provisionallydeleted_as_deleted {
372 my $self = shift (@_);
373
374 print STDERR "@@@@@ oaiinfo::mark_all_PD_as_D(): Marking the PD entries as D\n";
375
376 foreach my $OID (keys $self->{'info'}) {
377 my $curr_status = $self->get_status_info($OID);
378 if(defined $curr_status && $curr_status eq "PD") {
379 $self->set_info($OID, "D", $self->get_current_time());
380 }
381 }
382}
383
384# find the OID, if it exists, make its status=E for existing.
385sub Xset_status_to_existing_if_OID_present {
386 my $self = shift (@_);
387 my ($OID) = @_;
388
389 $self->set_status_info($OID, "E"); # will only set it if OID exists
390}
391
392##################### GENERAL, NOT SPECIFIC TO PD-STEP ####################
393
394sub print_info {
395 my $self = shift (@_);
396
397 print STDERR "###########################################################\n";
398 print STDERR "@@@@@ oaiinfo::print_info(): oaidb in memory contains: \n";
399
400 foreach my $OID (keys $self->{'info'}) {
401 print STDERR "OID: $OID";
402 print STDERR " status: " . $self->{'info'}->{$OID}->[INFO_STATUS_INDEX];
403 print STDERR " time: " . $self->{'info'}->{$OID}->[INFO_TIMESTAMP_INDEX];
404 print STDERR "\n";
405 }
406
407 print STDERR "###########################################################\n";
408}
409
410
411# Find the OID, if it exists, make its status=E for existing. Leave its timestamp alone.
412# If the OID doesn't yet exist, add it as a new entry with status=E and with current timestamp.
413sub index { # Add a new oid with current time and E. If the oid was already present, mark as E
414 my $self = shift (@_);
415 my ($OID) = @_;
416
417 my $OID_info = $self->{'info'}->{$OID};
418
419 if (defined $OID_info) { # if OID is present, this will change status back to E, timestamp unchanged
420 $OID_info->[INFO_STATUS_INDEX] = "E";
421
422 } else { # if OID is not present, then it's now added as existing from current time on
423 $self->set_info($OID, "E", $self->get_current_time());
424 }
425}
426
427# Upon reindexing a document with identifier OID, change its timestamp to current time
428# if a new OID, then add as new entry with status=E and current timestamp
429sub reindex { # update timestamp if oid is already present, if not (unlikely), add as new
430 my $self = shift (@_);
431 my ($OID) = @_;
432
433 my $OID_info = $self->{'info'}->{$OID};
434 $self->set_info($OID, "E", $self->get_current_time()); # Takes care of 3 things:
435 # if OID exists, updates modified time to indicate the doc has been reindexed
436 # if OID exists, ensures any status=PD is flipped back to E for this OID doc (as we know it exists);
437 # if the OID doesn't yet exist, adds a new OID entry with status=E and current timestamp.
438
439}
440
441
442# Does the same as index():
443# OIDs that have been indexed upon rebuild may still be new to the oaidb: GS2 collections
444# are not OAI collections by default, unlike GS3 collections. Imagine rebuilding a (GS2) collection
445# 5 times and then setting them to be an OAI collection. In that case, the doc OIDs in the collection
446# may not be in the oaidb yet. Unless, we decide (as is the present case) to always maintain an oaidb
447# (always creating an oaidb regardless of whether the collection has OAI support turned on or not).
448sub been_indexed {
449 my $self = shift (@_);
450 my ($OID) = @_;
451
452 $self->index($OID);
453}
454
455# Upon deleting a document with identifier OID,
456# set status to deleted and change its timestamp to current time
457sub delete {
458 my $self = shift (@_);
459 my ($OID) = @_;
460
461 # the following method will set to current time if no timestamp provided,
462 # But by explicit here, the code is easier to follow
463 $self->set_info($OID, "D", $self->get_current_time());
464
465}
466
467#############################################################
468sub get_current_time {
469 my $self = shift (@_);
470 return time; # current time
471
472 # localtime(time) returns an array of values (day, month, year, hour, min, seconds) or singular string
473 # return localtime; # same as localtime(time); # http://perldoc.perl.org/functions/localtime.html
474
475}
476
477sub _load_info_txt
478{
479 my $self = shift (@_);
480 my ($filename) = @_;
481
482 if (defined $filename && &FileUtils::fileExists($filename)) {
483 open (INFILE, $filename) ||
484 die "oaiinfo::load_info couldn't read $filename\n";
485
486 my ($line, @lineparts);
487 while (defined ($line = <INFILE>)) {
488 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
489 @lineparts = split ("\t", $line);
490 if (scalar(@lineparts) >= 2) {
491 $self->add_info (@lineparts);
492 }
493 }
494 close (INFILE);
495 }
496
497
498}
499
500sub _load_info_db
501{
502 my $self = shift (@_);
503 my ($filename) = @_;
504
505 my $infodb_map = {};
506
507 &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
508
509 foreach my $oid ( keys $infodb_map ) {
510 my $vals = $infodb_map->{$oid};
511 # interested in oid, timestamp, deletion status
512
513 my ($deletion_status) = ($vals=~/^<status>(.*)$/m);
514 my ($timestamp) = ($vals=~/^<timestamp>(.*)$/m);
515
516 $self->add_info ($oid, $deletion_status, $timestamp);
517 }
518}
519
520# if no filename is passed in (and you don't generally want to), then
521# it tries to load in <collection>/etc/oai-inf.<db> if it exists
522sub load_info {
523 my $self = shift (@_);
524 my ($filename) = @_;
525
526 $self->{'info'} = {};
527
528 $filename = $self->{'oaidb_file_path'} unless defined $filename;
529
530 if (&FileUtils::fileExists($filename)) {
531 if ($filename =~ m/\.inf$/) {
532 $self->_load_info_txt($filename);
533 }
534 else {
535 $self->_load_info_db($filename);
536 }
537 }
538
539}
540
541sub _save_info_txt {
542 my $self = shift (@_);
543 my ($filename) = @_;
544
545 my ($OID, $info);
546
547 open (OUTFILE, ">$filename") ||
548 die "oaiinfo::save_info couldn't write $filename\n";
549
550 foreach $info (@{$self->get_OID_list()}) {
551 if (defined $info) {
552 print OUTFILE join("\t", @$info), "\n";
553 }
554 }
555 close (OUTFILE);
556}
557
558# if no filename is passed in (and you don't generally want to), then
559# this subroutine tries to write to <collection>/etc/oai-inf.<db>.
560sub _save_info_db {
561 my $self = shift (@_);
562 my ($filename) = @_;
563
564 $filename = $self->{'oaidb_file_path'} unless defined $filename;
565 my $infodbtype = $self->{'infodbtype'};
566
567 # write out again. Open file for overwriting, not appending.
568 # Then write out data structure $self->{'info'} that's been maintaining the data in-memory.
569 my $infodb_handle = &dbutil::open_infodb_write_handle($infodbtype, $filename);
570 foreach my $oid ( keys $self->{'info'} ) {
571 my $OID_info = $self->{'info'}->{$oid};
572 #my $val_hash = {
573 # "status" => $OID_info->[INFO_STATUS_INDEX],
574 # "timestamp" => $OID_info->[INFO_TIMESTAMP_INDEX]
575 #};
576
577 #&dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val_hash);
578
579 my $val = "<status>".$OID_info->[INFO_STATUS_INDEX]."\n<timestamp>".$OID_info->[INFO_TIMESTAMP_INDEX]."\n";
580 &dbutil::write_infodb_rawentry($infodbtype,$infodb_handle,$oid,$val);
581 }
582 &dbutil::close_infodb_write_handle($infodbtype, $infodb_handle);
583}
584
585sub save_info {
586 my $self = shift (@_);
587 my ($filename) = @_;
588
589 if(defined $filename) {
590 if ($filename =~ m/(contents)|(\.inf)$/) {
591 $self->_save_info_txt($filename);
592 }
593 else {
594 $self->_save_info_db($filename);
595 }
596 } else {
597 $self->_save_info_db();
598 }
599}
600
601sub delete_info {
602 my $self = shift (@_);
603 my ($OID) = @_;
604
605 print STDERR "@@@@ ERROR oaiinfo::delete_info: Not allowed to delete entries in oai DB.\n";
606
607 #if (defined $self->{'info'}->{$OID}) {
608 # delete $self->{'info'}->{$OID};
609 #}
610}
611
612sub set_info { # sets existing or appends
613 my $self = shift (@_);
614 my ($OID, $del_status, $timestamp) = @_;
615 if(!defined $timestamp) { # get current date timestamp
616 $timestamp = $self->get_current_time();
617 }
618 $self->{'info'}->{$OID} = [$del_status, $timestamp];
619
620}
621
622sub add_info { # appends iff it doesn't exist already
623 my $self = shift (@_);
624 my ($OID, $del_status, $timestamp) = @_;
625
626 if (defined($self->{'info'}->{$OID})) {
627 print STDERR "@@@@ ERROR oaiinfo::add_info: id $OID already exists. Not adding.\n";
628 } else {
629 $self->set_info($OID, $del_status, $timestamp);
630 }
631}
632
633sub set_status_info {
634 my $self = shift (@_);
635 my ($OID, $del_status) = @_;
636
637 my $OID_info = $self->{'info'}->{$OID};
638 if (defined $OID_info) {
639 $OID_info->[INFO_STATUS_INDEX] = $del_status;
640 }
641 else {
642 print STDERR "oaiinfo::set_status_info: Unable to find OAI document id $OID\n";
643 }
644}
645
646sub get_status_info {
647 my $self = shift (@_);
648 my ($OID) = @_;
649
650 my $del_status = undef;
651
652 my $OID_info = $self->{'info'}->{$OID};
653 if (defined $OID_info) {
654 $del_status = $OID_info->[INFO_STATUS_INDEX];
655 }
656 else {
657 print STDERR "oaiinfo::get_status_info: Unable to find OAI document id $OID\n";
658 }
659
660 return $del_status;
661}
662
663# if no timestamp provided, uses current timestamp
664sub set_timestamp_info {
665 my $self = shift (@_);
666 my ($OID, $timestamp) = @_;
667
668 my $OID_info = $self->{'info'}->{$OID};
669 if (defined $OID_info) {
670 if(!defined $timestamp) {
671 $timestamp = $self->get_current_time();
672 }
673 $OID_info->[INFO_TIMESTAMP_INDEX] = $timestamp;
674 }
675 else {
676 print STDERR "oaiinfo::set_timestamp_info: Unable to find OAI document id $OID\n";
677 }
678}
679
680sub get_timestamp_info {
681 my $self = shift (@_);
682 my ($OID) = @_;
683
684 my $timestamp = undef;
685
686 my $OID_info = $self->{'info'}->{$OID};
687 if (defined $OID_info) {
688 $timestamp = $OID_info->[INFO_TIMESTAMP_INDEX];
689 }
690 else {
691 print STDERR "oaiinfo::get_timestamp_info: Unable to find OAI document id $OID\n";
692 }
693
694 return $timestamp;
695}
696
697
698# returns a list of the form [[OID, timestamp, deletion_status], ...]
699sub get_OID_list
700{
701 my $self = shift (@_);
702
703 my @list = ();
704
705 foreach my $OID (keys $self->{'info'}) {
706 my $OID_info = $self->{'info'}->{$OID};
707
708 push (@list, [$OID, $OID_info->[INFO_STATUS_INDEX],
709 $OID_info->[INFO_TIMESTAMP_INDEX]]);
710 }
711
712 return \@list;
713}
714
715
716
717# returns an array/list of the form [deletion_status, timestamp]
718sub get_info {
719 my $self = shift (@_);
720 my ($OID) = @_;
721
722 if (defined $self->{'info'}->{$OID}) {
723 return $self->{'info'}->{$OID};
724 }
725
726 return undef;
727}
728
729
730
731# returns the number of entries so far, including deleted ones
732# http://stackoverflow.com/questions/1109095/how-can-i-find-the-number-of-keys-in-a-hash-in-perl
733sub size {
734 my $self = shift (@_);
735 return (scalar keys $self->{'info'});
736}
737
7381;
Note: See TracBrowser for help on using the repository browser.