source: main/trunk/greenstone2/perllib/arcinfo.pm@ 21564

Last change on this file since 21564 was 21564, checked in by mdewsnip, 14 years ago

Changed lots of occurrences of "GDBM" in comments, variable names and function names, where the code isn't GDBM-specific. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.5 KB
RevLine 
[537]1###########################################################################
2#
3# arcinfo.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
[4]27# This module stores information about the archives. At the moment
28# this information just consists of the file name (relative to the
29# directory the archives information file is in) and its OID.
30
31# This module assumes there is a one to one correspondance between
32# a file in the archives directory and an OID.
33
34package arcinfo;
35
[10157]36use constant ORDER_OID_INDEX => 0;
37use constant ORDER_SORT_INDEX => 1;
38
39use constant INFO_FILE_INDEX => 0;
40use constant INFO_STATUS_INDEX => 1;
41
[15889]42use strict;
43
[21543]44use dbutil;
[15889]45
[21543]46
[10157]47# File format read in: OID <tab> Filename <tab> Optional-Index-Status
48
49# Index status can be:
50# I = Index for the first time
51# R = Reindex
52# D = Delete
53# B = Been indexed
54
[4]55sub new {
56 my ($class) = @_;
[14]57 my $self = {'info'=>{},
[19774]58 'reverse-info'=>{},
[15073]59 'order'=>[],
60 'reverse_sort'=>0};
[4]61
62 return bless $self, $class;
63}
64
[18441]65sub _load_info_txt
66{
[4]67 my $self = shift (@_);
68 my ($filename) = @_;
69
[12328]70 if (defined $filename && -e $filename) {
[4]71 open (INFILE, $filename) ||
72 die "arcinfo::load_info couldn't read $filename\n";
73
74 my ($line, @line);
75 while (defined ($line = <INFILE>)) {
76 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
77 @line = split ("\t", $line); # filename,
78 if (scalar(@line) >= 2) {
[14]79 $self->add_info (@line);
[4]80 }
81 }
82 close (INFILE);
83 }
[18441]84
85
[4]86}
87
[21564]88sub _load_info_db
[18441]89{
[4]90 my $self = shift (@_);
91 my ($filename) = @_;
92
[18441]93 my $infodb_map = {};
94
[21543]95 &dbutil::read_infodb_file("gdbm", $filename, $infodb_map);
[18441]96
97 foreach my $oid ( keys %$infodb_map ) {
98 my $vals = $infodb_map->{$oid};
99 # interested in doc-file and index-status
100
101 my ($doc_file) = ($vals=~/^<doc-file>(.*)$/m);
102 my ($index_status) = ($vals=~/^<index-status>(.*)$/m);
[20747]103 my ($sortmeta) = ($vals=~/^<sort-meta>(.*)$/m);
104 $self->add_info ($oid,$doc_file,$index_status,$sortmeta);
[18441]105 }
106}
107
[19774]108
[18441]109sub load_info {
110 my $self = shift (@_);
111 my ($filename) = @_;
112
113 $self->{'info'} = {};
114
115 if ((defined $filename) && (-e $filename)) {
116 if ($filename =~ m/\.inf$/) {
117 $self->_load_info_txt($filename);
118 }
119 else {
[21564]120 $self->_load_info_db($filename);
[18441]121 }
122 }
123}
124
[21564]125sub _load_filelist_db
[18441]126{
127 my $self = shift (@_);
128 my ($filename) = @_;
129
130 my $infodb_map = {};
131
[21543]132 &dbutil::read_infodb_keys("gdbm", $filename, $infodb_map);
[18441]133
134 foreach my $file ( keys %$infodb_map ) {
[18456]135 $self->{'prev_import_filelist'}->{$file} = 1;
[18441]136 }
137}
138
139
[18456]140sub load_prev_import_filelist {
[18441]141 my $self = shift (@_);
142 my ($filename) = @_;
143
144 $self->{'import-filelist'} = {};
145
146 if ((defined $filename) && (-e $filename)) {
147 if ($filename =~ m/\.inf$/) {
148 # e.g. 'archives-src.inf' (which includes complete list of file
149 # from last time import.pl was run)
150 $self->_load_info_txt($filename);
151 }
152 else {
[21564]153 $self->_load_filelist_db($filename);
[18441]154 }
155 }
156}
157
[19774]158sub load_revinfo_UNTESTED
159{
160 my $self = shift (@_);
161 my ($rev_filename) = @_;
162
163 my $rev_infodb_map = {};
164
[21543]165 &dbutil::read_infodb_file("gdbm", $rev_filename, $rev_infodb_map);
[19774]166
167 foreach my $srcfile ( keys %$rev_infodb_map ) {
168 my $vals = $rev_infodb_map->{$srcfile};
169
170 foreach my $OID ($vals =~ m/^<oid>(.*)$/gm) {
171 $self->add_revinfo($srcfile,$OID);
172 }
173 }
174}
175
176
[18441]177sub _save_info_txt {
178 my $self = shift (@_);
179 my ($filename) = @_;
180
[14]181 my ($OID, $info);
[4]182
183 open (OUTFILE, ">$filename") ||
184 die "arcinfo::save_info couldn't write $filename\n";
[7904]185
[14]186 foreach $info (@{$self->get_OID_list()}) {
[4]187 if (defined $info) {
[14]188 print OUTFILE join("\t", @$info), "\n";
[4]189 }
190 }
191 close (OUTFILE);
192}
193
[21564]194sub _save_info_db {
[18441]195 my $self = shift (@_);
196 my ($filename) = @_;
197
198 # Not the most efficient operation, but will do for now
199
200 # read it in
201 my $infodb_map = {};
[21543]202 &dbutil::read_infodb_file("gdbm", $filename, $infodb_map);
[18441]203
204 # change index-status values
205 foreach my $info (@{$self->get_OID_list()}) {
206 if (defined $info) {
207 my ($oid,$doc_file,$index_status) = @$info;
208 if (defined $infodb_map->{$oid}) {
209 my $vals_ref = \$infodb_map->{$oid};
210 $$vals_ref =~ s/^<index-status>(.*)$/<index-status>$index_status/m;
211 }
212 else {
213 print STDERR "Warning: $filename does not have key $oid\n";
214 }
215 }
216 }
217
218
219 # write out again
[21543]220 my $infodb_handle = &dbutil::open_infodb_write_handle("gdbm", $filename);
[18441]221 foreach my $oid ( keys %$infodb_map ) {
222 # consider making the following a method in dbutil
223 # e.g. write_infodb_rawentry_gdbm($infodb_handle,$oid,$vals);
224
225 # no need to escape, as $infodb_map->{$oid} hasn't been unescaped
226 print $infodb_handle "[$oid]\n";
227 print $infodb_handle $infodb_map->{$oid};
228 print $infodb_handle '-' x 70, "\n";
229 }
[21543]230 &dbutil::close_infodb_write_handle("gdbm", $infodb_handle);
[18441]231
232}
233
[21564]234sub save_revinfo_db {
[19774]235 my $self = shift (@_);
236 my ($rev_filename) = @_;
237
238 # Output reverse lookup database
239
240 my $rev_infodb_map = $self->{'reverse-info'};
241 my $rev_infodb_handle
[21543]242 = &dbutil::open_infodb_write_handle("gdbm", $rev_filename, "append");
[19774]243
244 foreach my $key ( keys %$rev_infodb_map ) {
245 my $val_hash = $rev_infodb_map->{$key};
[21543]246 &dbutil::write_infodb_entry("gdbm", $rev_infodb_handle, $key, $val_hash);
[19774]247 }
[21543]248 &dbutil::close_infodb_write_handle("gdbm", $rev_infodb_handle);
[19774]249
250}
251
[18441]252sub save_info {
253 my $self = shift (@_);
254 my ($filename) = @_;
255
[20537]256 if ($filename =~ m/(contents)|(\.inf)$/) {
[18441]257 $self->_save_info_txt($filename);
258 }
259 else {
[21564]260 $self->_save_info_db($filename);
[18441]261 }
262}
263
[14]264sub delete_info {
265 my $self = shift (@_);
266 my ($OID) = @_;
267
268 if (defined $self->{'info'}->{$OID}) {
269 delete $self->{'info'}->{$OID};
270
271 my $i = 0;
272 while ($i < scalar (@{$self->{'order'}})) {
[10157]273 if ($self->{'order'}->[$i]->[ORDER_OID_INDEX] eq $OID) {
[14]274 splice (@{$self->{'order'}}, $i, 1);
275 last;
276 }
277
278 $i ++;
279 }
280 }
281}
282
[4]283sub add_info {
284 my $self = shift (@_);
[10157]285 my ($OID, $doc_file, $index_status, $sortmeta) = @_;
[1287]286 $sortmeta = "" unless defined $sortmeta;
[10157]287 $index_status = "I" unless defined $index_status; # I = needs indexing
[4]288
[3416]289 if (! defined($OID)) {
290 # only happens when no files can be processed?
291 return undef;
292 }
[10157]293
[18469]294 if (defined $self->{'info'}->{$OID}) {
295 # test to see if we are in a reindex situation
296
297 my $existing_status_info = $self->get_status_info($OID);
298
299 if ($existing_status_info eq "D") {
300 # yes, we're in a reindexing situation
301 $self->delete_info ($OID);
302
303
304 # force setting to "reindex"
305 $index_status = "R";
306
307 }
308 else {
309 # some other, possibly erroneous, situation has arisen
310 # where the document already seems to exist
311 print STDERR "Warning: $OID already exists with index status $existing_status_info\n";
312 print STDERR " Deleting previous version\n";
313
314 $self->delete_info ($OID);
315 }
316 }
317
[20747]318 $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta];
[1287]319 push (@{$self->{'order'}}, [$OID, $sortmeta]);
[18469]320
321
[4]322}
323
[10157]324sub set_status_info {
[4]325 my $self = shift (@_);
[10157]326 my ($OID, $index_status) = @_;
[4]327
[10157]328 my $OID_info = $self->{'info'}->{$OID};
329 $OID_info->[INFO_STATUS_INDEX] = $index_status;
330}
331
332
333sub get_status_info {
334 my $self = shift (@_);
335 my ($OID) = @_;
336
337 my $index_status = undef;
338
339 my $OID_info = $self->{'info'}->{$OID};
340 if (defined $OID_info) {
341 $index_status = $OID_info->[INFO_STATUS_INDEX];
342 }
343 else {
344 die "Unable to find document id $OID\n";
345 }
346
347 return $index_status;
348
349}
350
[19774]351
352sub add_reverseinfo {
353 my $self = shift (@_);
354 my ($key, $OID) = @_;
355
356 my $existing_key = $self->{'reverse-info'}->{$key};
357 if (!defined $existing_key) {
358 $existing_key = {};
359 $self->{'reverse-info'}->{$key} = $existing_key;
360 }
361
362 my $existing_oid = $existing_key->{'oid'};
363 if (!defined $existing_oid) {
364 $existing_oid = [];
365 $existing_key->{'oid'} = $existing_oid;
366 }
367
368 push(@$existing_oid,$OID);
369}
370
[20802]371sub set_meta_file_flag {
372 my $self = shift (@_);
373 my ($key) = @_;
[19774]374
[20802]375 my $existing_key = $self->{'reverse-info'}->{$key};
376 if (!defined $existing_key) {
377 $existing_key = {};
378 $self->{'reverse-info'}->{$key} = $existing_key;
379 }
380
381 $existing_key->{'meta-file'} = ["1"];
382
383}
[15073]384sub reverse_sort
385{
386 my $self = shift(@_);
387 $self->{'reverse_sort'} = 1;
388}
[10157]389
390# returns a list of the form [[OID, doc_file, index_status], ...]
391sub get_OID_list
392{
393 my $self = shift (@_);
394
395 my $order = $self->{'order'};
396
[15073]397 my @sorted_order;
398 if ($self->{'reverse_sort'}) {
399 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
400 } else {
401 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
402 }
[10157]403
[4]404 my @list = ();
405
[10157]406 foreach my $OID_order (@sorted_order) {
407 my $OID = $OID_order->[ORDER_OID_INDEX];
408 my $OID_info = $self->{'info'}->{$OID};
409
410 push (@list, [$OID, $OID_info->[INFO_FILE_INDEX],
411 $OID_info->[INFO_STATUS_INDEX]]);
[4]412 }
413
414 return \@list;
415}
416
417# returns a list of the form [[doc_file, OID], ...]
418sub get_file_list {
419 my $self = shift (@_);
420
[10157]421 my $order = $self->{'order'};
422
[15073]423 my @sorted_order;
424 if ($self->{'reverse_sort'}) {
425 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
426 } else {
427 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
428 }
[10157]429
[4]430 my @list = ();
431
[15889]432 foreach my $OID_order (@sorted_order) {
[10157]433 my $OID = $OID_order->[ORDER_OID_INDEX];
434 my $OID_info = $self->{'info'}->{$OID};
435
436 push (@list, [$OID_info->[INFO_FILE_INDEX], $OID]);
[4]437 }
438
439 return \@list;
440}
441
442
[20747]443# returns a list of the form [doc_file,index_status,$sort_meta]
[4]444sub get_info {
445 my $self = shift (@_);
446 my ($OID) = @_;
447
448 if (defined $self->{'info'}->{$OID}) {
449 return $self->{'info'}->{$OID};
450 }
451
452 return undef;
453}
454
455
[20747]456
[98]457# returns the number of documents so far
458sub size {
459 my $self = shift (@_);
460 return (scalar(@{$self->{'order'}}));
461}
462
[4]4631;
464
Note: See TracBrowser for help on using the repository browser.