source: main/trunk/greenstone2/perllib/arcinfo.pm@ 21579

Last change on this file since 21579 was 21579, checked in by mdewsnip, 14 years ago

Changed arcinfo.pm to take the infodb type to use for the archiveinf databases as a parameter. If this is not defined, the default infodb type from dbutil will be used (this is likely to always be gdbm). Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.9 KB
Line 
1###########################################################################
2#
3# arcinfo.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
27# This module stores information about the archives. At the moment
28# this information just consists of the file name (relative to the
29# directory the archives information file is in) and its OID.
30
31# This module assumes there is a one to one correspondance between
32# a file in the archives directory and an OID.
33
34package arcinfo;
35
36use constant ORDER_OID_INDEX => 0;
37use constant ORDER_SORT_INDEX => 1;
38
39use constant INFO_FILE_INDEX => 0;
40use constant INFO_STATUS_INDEX => 1;
41
42use strict;
43
44use dbutil;
45
46
47# File format read in: OID <tab> Filename <tab> Optional-Index-Status
48
49# Index status can be:
50# I = Index for the first time
51# R = Reindex
52# D = Delete
53# B = Been indexed
54
55sub new {
56 my $class = shift(@_);
57 my $infodbtype = shift(@_);
58
59 # If the infodbtype wasn't passed in, use the default from dbutil
60 if (!defined($infodbtype))
61 {
62 $infodbtype = &dbutil::get_default_infodb_type();
63 }
64
65 my $self = {'infodbtype' => $infodbtype,
66 'info'=>{},
67 'reverse-info'=>{},
68 'order'=>[],
69 'reverse_sort'=>0};
70
71 return bless $self, $class;
72}
73
74sub _load_info_txt
75{
76 my $self = shift (@_);
77 my ($filename) = @_;
78
79 if (defined $filename && -e $filename) {
80 open (INFILE, $filename) ||
81 die "arcinfo::load_info couldn't read $filename\n";
82
83 my ($line, @line);
84 while (defined ($line = <INFILE>)) {
85 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
86 @line = split ("\t", $line); # filename,
87 if (scalar(@line) >= 2) {
88 $self->add_info (@line);
89 }
90 }
91 close (INFILE);
92 }
93
94
95}
96
97sub _load_info_db
98{
99 my $self = shift (@_);
100 my ($filename) = @_;
101
102 my $infodb_map = {};
103
104 &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
105
106 foreach my $oid ( keys %$infodb_map ) {
107 my $vals = $infodb_map->{$oid};
108 # interested in doc-file and index-status
109
110 my ($doc_file) = ($vals=~/^<doc-file>(.*)$/m);
111 my ($index_status) = ($vals=~/^<index-status>(.*)$/m);
112 my ($sortmeta) = ($vals=~/^<sort-meta>(.*)$/m);
113 $self->add_info ($oid,$doc_file,$index_status,$sortmeta);
114 }
115}
116
117
118sub load_info {
119 my $self = shift (@_);
120 my ($filename) = @_;
121
122 $self->{'info'} = {};
123
124 if ((defined $filename) && (-e $filename)) {
125 if ($filename =~ m/\.inf$/) {
126 $self->_load_info_txt($filename);
127 }
128 else {
129 $self->_load_info_db($filename);
130 }
131 }
132}
133
134sub _load_filelist_db
135{
136 my $self = shift (@_);
137 my ($filename) = @_;
138
139 my $infodb_map = {};
140
141 &dbutil::read_infodb_keys($self->{'infodbtype'}, $filename, $infodb_map);
142
143 foreach my $file ( keys %$infodb_map ) {
144 $self->{'prev_import_filelist'}->{$file} = 1;
145 }
146}
147
148
149sub load_prev_import_filelist {
150 my $self = shift (@_);
151 my ($filename) = @_;
152
153 $self->{'import-filelist'} = {};
154
155 if ((defined $filename) && (-e $filename)) {
156 if ($filename =~ m/\.inf$/) {
157 # e.g. 'archives-src.inf' (which includes complete list of file
158 # from last time import.pl was run)
159 $self->_load_info_txt($filename);
160 }
161 else {
162 $self->_load_filelist_db($filename);
163 }
164 }
165}
166
167sub load_revinfo_UNTESTED
168{
169 my $self = shift (@_);
170 my ($rev_filename) = @_;
171
172 my $rev_infodb_map = {};
173
174 &dbutil::read_infodb_file($self->{'infodbtype'}, $rev_filename, $rev_infodb_map);
175
176 foreach my $srcfile ( keys %$rev_infodb_map ) {
177 my $vals = $rev_infodb_map->{$srcfile};
178
179 foreach my $OID ($vals =~ m/^<oid>(.*)$/gm) {
180 $self->add_revinfo($srcfile,$OID);
181 }
182 }
183}
184
185
186sub _save_info_txt {
187 my $self = shift (@_);
188 my ($filename) = @_;
189
190 my ($OID, $info);
191
192 open (OUTFILE, ">$filename") ||
193 die "arcinfo::save_info couldn't write $filename\n";
194
195 foreach $info (@{$self->get_OID_list()}) {
196 if (defined $info) {
197 print OUTFILE join("\t", @$info), "\n";
198 }
199 }
200 close (OUTFILE);
201}
202
203sub _save_info_db {
204 my $self = shift (@_);
205 my ($filename) = @_;
206
207 # Not the most efficient operation, but will do for now
208
209 # read it in
210 my $infodb_map = {};
211 &dbutil::read_infodb_file($self->{'infodbtype'}, $filename, $infodb_map);
212
213 # change index-status values
214 foreach my $info (@{$self->get_OID_list()}) {
215 if (defined $info) {
216 my ($oid,$doc_file,$index_status) = @$info;
217 if (defined $infodb_map->{$oid}) {
218 my $vals_ref = \$infodb_map->{$oid};
219 $$vals_ref =~ s/^<index-status>(.*)$/<index-status>$index_status/m;
220 }
221 else {
222 print STDERR "Warning: $filename does not have key $oid\n";
223 }
224 }
225 }
226
227
228 # write out again
229 my $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $filename);
230 foreach my $oid ( keys %$infodb_map ) {
231 # consider making the following a method in dbutil
232 # e.g. write_infodb_rawentry($infodb_handle,$oid,$vals);
233
234 # no need to escape, as $infodb_map->{$oid} hasn't been unescaped
235 # GDBM SPECIFIC!
236 print $infodb_handle "[$oid]\n";
237 print $infodb_handle $infodb_map->{$oid};
238 print $infodb_handle '-' x 70, "\n";
239 }
240 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_handle);
241
242}
243
244sub save_revinfo_db {
245 my $self = shift (@_);
246 my ($rev_filename) = @_;
247
248 # Output reverse lookup database
249
250 my $rev_infodb_map = $self->{'reverse-info'};
251 my $rev_infodb_handle
252 = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $rev_filename, "append");
253
254 foreach my $key ( keys %$rev_infodb_map ) {
255 my $val_hash = $rev_infodb_map->{$key};
256 &dbutil::write_infodb_entry($self->{'infodbtype'}, $rev_infodb_handle, $key, $val_hash);
257 }
258 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $rev_infodb_handle);
259
260}
261
262sub save_info {
263 my $self = shift (@_);
264 my ($filename) = @_;
265
266 if ($filename =~ m/(contents)|(\.inf)$/) {
267 $self->_save_info_txt($filename);
268 }
269 else {
270 $self->_save_info_db($filename);
271 }
272}
273
274sub delete_info {
275 my $self = shift (@_);
276 my ($OID) = @_;
277
278 if (defined $self->{'info'}->{$OID}) {
279 delete $self->{'info'}->{$OID};
280
281 my $i = 0;
282 while ($i < scalar (@{$self->{'order'}})) {
283 if ($self->{'order'}->[$i]->[ORDER_OID_INDEX] eq $OID) {
284 splice (@{$self->{'order'}}, $i, 1);
285 last;
286 }
287
288 $i ++;
289 }
290 }
291}
292
293sub add_info {
294 my $self = shift (@_);
295 my ($OID, $doc_file, $index_status, $sortmeta) = @_;
296 $sortmeta = "" unless defined $sortmeta;
297 $index_status = "I" unless defined $index_status; # I = needs indexing
298
299 if (! defined($OID)) {
300 # only happens when no files can be processed?
301 return undef;
302 }
303
304 if (defined $self->{'info'}->{$OID}) {
305 # test to see if we are in a reindex situation
306
307 my $existing_status_info = $self->get_status_info($OID);
308
309 if ($existing_status_info eq "D") {
310 # yes, we're in a reindexing situation
311 $self->delete_info ($OID);
312
313
314 # force setting to "reindex"
315 $index_status = "R";
316
317 }
318 else {
319 # some other, possibly erroneous, situation has arisen
320 # where the document already seems to exist
321 print STDERR "Warning: $OID already exists with index status $existing_status_info\n";
322 print STDERR " Deleting previous version\n";
323
324 $self->delete_info ($OID);
325 }
326 }
327
328 $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta];
329 push (@{$self->{'order'}}, [$OID, $sortmeta]);
330
331
332}
333
334sub set_status_info {
335 my $self = shift (@_);
336 my ($OID, $index_status) = @_;
337
338 my $OID_info = $self->{'info'}->{$OID};
339 $OID_info->[INFO_STATUS_INDEX] = $index_status;
340}
341
342
343sub get_status_info {
344 my $self = shift (@_);
345 my ($OID) = @_;
346
347 my $index_status = undef;
348
349 my $OID_info = $self->{'info'}->{$OID};
350 if (defined $OID_info) {
351 $index_status = $OID_info->[INFO_STATUS_INDEX];
352 }
353 else {
354 die "Unable to find document id $OID\n";
355 }
356
357 return $index_status;
358
359}
360
361
362sub add_reverseinfo {
363 my $self = shift (@_);
364 my ($key, $OID) = @_;
365
366 my $existing_key = $self->{'reverse-info'}->{$key};
367 if (!defined $existing_key) {
368 $existing_key = {};
369 $self->{'reverse-info'}->{$key} = $existing_key;
370 }
371
372 my $existing_oid = $existing_key->{'oid'};
373 if (!defined $existing_oid) {
374 $existing_oid = [];
375 $existing_key->{'oid'} = $existing_oid;
376 }
377
378 push(@$existing_oid,$OID);
379}
380
381sub set_meta_file_flag {
382 my $self = shift (@_);
383 my ($key) = @_;
384
385 my $existing_key = $self->{'reverse-info'}->{$key};
386 if (!defined $existing_key) {
387 $existing_key = {};
388 $self->{'reverse-info'}->{$key} = $existing_key;
389 }
390
391 $existing_key->{'meta-file'} = ["1"];
392
393}
394sub reverse_sort
395{
396 my $self = shift(@_);
397 $self->{'reverse_sort'} = 1;
398}
399
400# returns a list of the form [[OID, doc_file, index_status], ...]
401sub get_OID_list
402{
403 my $self = shift (@_);
404
405 my $order = $self->{'order'};
406
407 my @sorted_order;
408 if ($self->{'reverse_sort'}) {
409 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
410 } else {
411 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
412 }
413
414 my @list = ();
415
416 foreach my $OID_order (@sorted_order) {
417 my $OID = $OID_order->[ORDER_OID_INDEX];
418 my $OID_info = $self->{'info'}->{$OID};
419
420 push (@list, [$OID, $OID_info->[INFO_FILE_INDEX],
421 $OID_info->[INFO_STATUS_INDEX]]);
422 }
423
424 return \@list;
425}
426
427# returns a list of the form [[doc_file, OID], ...]
428sub get_file_list {
429 my $self = shift (@_);
430
431 my $order = $self->{'order'};
432
433 my @sorted_order;
434 if ($self->{'reverse_sort'}) {
435 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
436 } else {
437 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
438 }
439
440 my @list = ();
441
442 foreach my $OID_order (@sorted_order) {
443 my $OID = $OID_order->[ORDER_OID_INDEX];
444 my $OID_info = $self->{'info'}->{$OID};
445
446 push (@list, [$OID_info->[INFO_FILE_INDEX], $OID]);
447 }
448
449 return \@list;
450}
451
452
453# returns a list of the form [doc_file,index_status,$sort_meta]
454sub get_info {
455 my $self = shift (@_);
456 my ($OID) = @_;
457
458 if (defined $self->{'info'}->{$OID}) {
459 return $self->{'info'}->{$OID};
460 }
461
462 return undef;
463}
464
465
466
467# returns the number of documents so far
468sub size {
469 my $self = shift (@_);
470 return (scalar(@{$self->{'order'}}));
471}
472
4731;
474
Note: See TracBrowser for help on using the repository browser.