########################################################################### # # arcinfo.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # This module stores information about the archives. At the moment # this information just consists of the file name (relative to the # directory the archives information file is in) and its OID. # This module assumes there is a one to one correspondance between # a file in the archives directory and an OID. package arcinfo; use constant ORDER_OID_INDEX => 0; use constant ORDER_SORT_INDEX => 1; use constant INFO_FILE_INDEX => 0; use constant INFO_STATUS_INDEX => 1; # File format read in: OID Filename Optional-Index-Status # Index status can be: # I = Index for the first time # R = Reindex # D = Delete # B = Been indexed sub new { my ($class) = @_; my $self = {'info'=>{}, 'order'=>[]}; return bless $self, $class; } sub load_info { my $self = shift (@_); my ($filename) = @_; $self->{'info'} = {}; if (defined $filename && -e $filename) { open (INFILE, $filename) || die "arcinfo::load_info couldn't read $filename\n"; my ($line, @line); while (defined ($line = )) { $line =~ s/\cM|\cJ//g; # remove end-of-line characters @line = split ("\t", $line); # filename, if (scalar(@line) >= 2) { $self->add_info (@line); } } close (INFILE); } } sub save_info { my $self = shift (@_); my ($filename) = @_; my ($OID, $info); open (OUTFILE, ">$filename") || die "arcinfo::save_info couldn't write $filename\n"; foreach $info (@{$self->get_OID_list()}) { if (defined $info) { print OUTFILE join("\t", @$info), "\n"; } } close (OUTFILE); } sub delete_info { my $self = shift (@_); my ($OID) = @_; if (defined $self->{'info'}->{$OID}) { delete $self->{'info'}->{$OID}; my $i = 0; while ($i < scalar (@{$self->{'order'}})) { if ($self->{'order'}->[$i]->[ORDER_OID_INDEX] eq $OID) { splice (@{$self->{'order'}}, $i, 1); last; } $i ++; } } } sub add_info { my $self = shift (@_); my ($OID, $doc_file, $index_status, $sortmeta) = @_; $sortmeta = "" unless defined $sortmeta; $index_status = "I" unless defined $index_status; # I = needs indexing if (! defined($OID)) { # only happens when no files can be processed? return undef; } $self->delete_info ($OID); $self->{'info'}->{$OID} = [$doc_file,$index_status]; push (@{$self->{'order'}}, [$OID, $sortmeta]); } sub set_status_info { my $self = shift (@_); my ($OID, $index_status) = @_; my $OID_info = $self->{'info'}->{$OID}; $OID_info->[INFO_STATUS_INDEX] = $index_status; } sub get_status_info { my $self = shift (@_); my ($OID) = @_; my $index_status = undef; my $OID_info = $self->{'info'}->{$OID}; if (defined $OID_info) { $index_status = $OID_info->[INFO_STATUS_INDEX]; } else { die "Unable to find document id $OID\n"; } return $index_status; } # returns a list of the form [[OID, doc_file, index_status], ...] sub get_OID_list { my $self = shift (@_); my $order = $self->{'order'}; my @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order; my @list = (); foreach my $OID_order (@sorted_order) { my $OID = $OID_order->[ORDER_OID_INDEX]; my $OID_info = $self->{'info'}->{$OID}; push (@list, [$OID, $OID_info->[INFO_FILE_INDEX], $OID_info->[INFO_STATUS_INDEX]]); } return \@list; } # returns a list of the form [[doc_file, OID], ...] sub get_file_list { my $self = shift (@_); my $order = $self->{'order'}; my @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order; my @list = (); foreach $OID_order (@sorted_order) { my $OID = $OID_order->[ORDER_OID_INDEX]; my $OID_info = $self->{'info'}->{$OID}; push (@list, [$OID_info->[INFO_FILE_INDEX], $OID]); } return \@list; } # returns a list of the form [doc_file] sub get_info { my $self = shift (@_); my ($OID) = @_; if (defined $self->{'info'}->{$OID}) { return $self->{'info'}->{$OID}; } return undef; } # returns the number of documents so far sub size { my $self = shift (@_); return (scalar(@{$self->{'order'}})); } 1;