source: gsdl/trunk/perllib/arcinfo.pm@ 15073

Last change on this file since 15073 was 15073, checked in by kjdon, 16 years ago

added -reversesort option to import.pl. Used with -sortmeta option, to sort in reverse order

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1###########################################################################
2#
3# arcinfo.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
27# This module stores information about the archives. At the moment
28# this information just consists of the file name (relative to the
29# directory the archives information file is in) and its OID.
30
31# This module assumes there is a one to one correspondance between
32# a file in the archives directory and an OID.
33
34
35package arcinfo;
36
37use constant ORDER_OID_INDEX => 0;
38use constant ORDER_SORT_INDEX => 1;
39
40use constant INFO_FILE_INDEX => 0;
41use constant INFO_STATUS_INDEX => 1;
42
43# File format read in: OID <tab> Filename <tab> Optional-Index-Status
44
45# Index status can be:
46# I = Index for the first time
47# R = Reindex
48# D = Delete
49# B = Been indexed
50
51sub new {
52 my ($class) = @_;
53 my $self = {'info'=>{},
54 'order'=>[],
55 'reverse_sort'=>0};
56
57 return bless $self, $class;
58}
59
60sub load_info {
61 my $self = shift (@_);
62 my ($filename) = @_;
63
64 $self->{'info'} = {};
65
66 if (defined $filename && -e $filename) {
67 open (INFILE, $filename) ||
68 die "arcinfo::load_info couldn't read $filename\n";
69
70 my ($line, @line);
71 while (defined ($line = <INFILE>)) {
72 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
73 @line = split ("\t", $line); # filename,
74 if (scalar(@line) >= 2) {
75 $self->add_info (@line);
76 }
77 }
78 close (INFILE);
79 }
80}
81
82sub save_info {
83 my $self = shift (@_);
84 my ($filename) = @_;
85
86 my ($OID, $info);
87
88 open (OUTFILE, ">$filename") ||
89 die "arcinfo::save_info couldn't write $filename\n";
90
91 foreach $info (@{$self->get_OID_list()}) {
92 if (defined $info) {
93 print OUTFILE join("\t", @$info), "\n";
94 }
95 }
96 close (OUTFILE);
97}
98
99sub delete_info {
100 my $self = shift (@_);
101 my ($OID) = @_;
102
103 if (defined $self->{'info'}->{$OID}) {
104 delete $self->{'info'}->{$OID};
105
106 my $i = 0;
107 while ($i < scalar (@{$self->{'order'}})) {
108 if ($self->{'order'}->[$i]->[ORDER_OID_INDEX] eq $OID) {
109 splice (@{$self->{'order'}}, $i, 1);
110 last;
111 }
112
113 $i ++;
114 }
115 }
116}
117
118sub add_info {
119 my $self = shift (@_);
120 my ($OID, $doc_file, $index_status, $sortmeta) = @_;
121 $sortmeta = "" unless defined $sortmeta;
122 $index_status = "I" unless defined $index_status; # I = needs indexing
123
124 if (! defined($OID)) {
125 # only happens when no files can be processed?
126 return undef;
127 }
128
129 $self->delete_info ($OID);
130 $self->{'info'}->{$OID} = [$doc_file,$index_status];
131 push (@{$self->{'order'}}, [$OID, $sortmeta]);
132}
133
134sub set_status_info {
135 my $self = shift (@_);
136 my ($OID, $index_status) = @_;
137
138 my $OID_info = $self->{'info'}->{$OID};
139 $OID_info->[INFO_STATUS_INDEX] = $index_status;
140}
141
142
143sub get_status_info {
144 my $self = shift (@_);
145 my ($OID) = @_;
146
147 my $index_status = undef;
148
149 my $OID_info = $self->{'info'}->{$OID};
150 if (defined $OID_info) {
151 $index_status = $OID_info->[INFO_STATUS_INDEX];
152 }
153 else {
154 die "Unable to find document id $OID\n";
155 }
156
157 return $index_status;
158
159}
160
161sub reverse_sort
162{
163 my $self = shift(@_);
164 $self->{'reverse_sort'} = 1;
165}
166
167# returns a list of the form [[OID, doc_file, index_status], ...]
168sub get_OID_list
169{
170 my $self = shift (@_);
171
172 my $order = $self->{'order'};
173
174 my @sorted_order;
175 if ($self->{'reverse_sort'}) {
176 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
177 } else {
178 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
179 }
180
181 my @list = ();
182
183 foreach my $OID_order (@sorted_order) {
184 my $OID = $OID_order->[ORDER_OID_INDEX];
185 my $OID_info = $self->{'info'}->{$OID};
186
187 push (@list, [$OID, $OID_info->[INFO_FILE_INDEX],
188 $OID_info->[INFO_STATUS_INDEX]]);
189 }
190
191 return \@list;
192}
193
194# returns a list of the form [[doc_file, OID], ...]
195sub get_file_list {
196 my $self = shift (@_);
197
198 my $order = $self->{'order'};
199
200 my @sorted_order;
201 if ($self->{'reverse_sort'}) {
202 @sorted_order = sort {$b->[ORDER_SORT_INDEX] cmp $a->[ORDER_SORT_INDEX]} @$order;
203 } else {
204 @sorted_order = sort {$a->[ORDER_SORT_INDEX] cmp $b->[ORDER_SORT_INDEX]} @$order;
205 }
206
207 my @list = ();
208
209 foreach $OID_order (@sorted_order) {
210 my $OID = $OID_order->[ORDER_OID_INDEX];
211 my $OID_info = $self->{'info'}->{$OID};
212
213 push (@list, [$OID_info->[INFO_FILE_INDEX], $OID]);
214 }
215
216 return \@list;
217}
218
219
220# returns a list of the form [doc_file]
221sub get_info {
222 my $self = shift (@_);
223 my ($OID) = @_;
224
225 if (defined $self->{'info'}->{$OID}) {
226 return $self->{'info'}->{$OID};
227 }
228
229 return undef;
230}
231
232
233# returns the number of documents so far
234sub size {
235 my $self = shift (@_);
236 return (scalar(@{$self->{'order'}}));
237}
238
2391;
240
Note: See TracBrowser for help on using the repository browser.