source: main/trunk/greenstone2/perllib/expinfo.pm@ 32578

Last change on this file since 32578 was 20651, checked in by davidb, 15 years ago

Updated to support incremental exporting

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1###########################################################################
2#
3# expinfo.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# modified by: Chi-Yu Huang
27# This module stores information about the export directory. At the moment
28# this information just consists of the file name (relative to the
29# directory the export information file is in) and its OID.
30
31# This module assumes there is a one to one correspondance between
32# a file in the export directory and an OID.
33
34package expinfo;
35
36
37use strict;
38
39
40sub new {
41 my ($class) = @_;
42 my $self = {'info'=>{},
43 'order'=>[]};
44
45 return bless $self, $class;
46}
47
48sub load_info {
49 my $self = shift (@_);
50 my ($filename) = @_;
51
52 $self->{'info'} = {};
53
54 if (-e $filename) {
55 open (INFILE, $filename) ||
56 die "expinfo::load_info couldn't read $filename\n";
57
58 my ($line, @line);
59 while (defined ($line = <INFILE>)) {
60 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
61 @line = split ("\t", $line); # filename,
62 if (scalar(@line) >= 2) {
63 $self->add_info (@line);
64 }
65 }
66 close (INFILE);
67 }
68}
69
70sub save_info {
71 my $self = shift (@_);
72 my ($filename) = @_;
73
74 my ($OID, $info);
75
76 open (OUTFILE, ">$filename") ||
77 die "expinfo::save_info couldn't write $filename\n";
78
79 foreach $info (@{$self->get_OID_list()}) {
80 if (defined $info) {
81 print OUTFILE join("\t", @$info), "\n";
82 }
83 }
84 close (OUTFILE);
85}
86
87sub delete_info {
88 my $self = shift (@_);
89 my ($OID) = @_;
90
91 if (defined $self->{'info'}->{$OID}) {
92 delete $self->{'info'}->{$OID};
93
94 my $i = 0;
95 while ($i < scalar (@{$self->{'order'}})) {
96 if ($self->{'order'}->[$i]->[0] eq $OID) {
97 splice (@{$self->{'order'}}, $i, 1);
98 last;
99 }
100 $i ++;
101 }
102 }
103}
104
105sub add_info {
106 my $self = shift (@_);
107 my ($OID, $doc_file, $index_status, $sortmeta) = @_;
108 $sortmeta = "" unless defined $sortmeta;
109
110 if (! defined($OID)) {
111 # only happens when no files can be processed?
112 return undef;
113 }
114
115 print STDERR "**** adding info $OID\n";
116
117 if (defined $self->{'info'}->{$OID}) {
118 # test to see if we are in a reindex situation
119
120 my $existing_status_info = $self->get_status_info($OID);
121
122 if ($existing_status_info eq "D") {
123 # yes, we're in a reindexing situation
124 $self->delete_info ($OID);
125
126
127 # force setting to "reindex"
128 $index_status = "R";
129
130 }
131 else {
132 # some other, possibly erroneous, situation has arisen
133 # where the document already seems to exist
134 print STDERR "Warning: $OID already exists with index status $existing_status_info\n";
135 print STDERR " Deleting previous version\n";
136
137 $self->delete_info ($OID);
138 }
139 }
140
141 $self->{'info'}->{$OID} = [$doc_file,$index_status];
142 push (@{$self->{'order'}}, [$OID, $sortmeta]);
143}
144
145# returns a list of the form [[OID, doc_file], ...]
146sub get_OID_list {
147 my $self = shift (@_);
148
149 my ($OID);
150 my @list = ();
151
152 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) {
153 push (@list, [$OID->[0], $self->{'info'}->{$OID->[0]}->[0]]);
154 }
155 return \@list;
156}
157
158# returns a list of the form [[doc_file, OID], ...]
159sub get_file_list {
160 my $self = shift (@_);
161
162 my ($OID);
163 my @list = ();
164
165 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) {
166 push (@list, [$self->{'info'}->{$OID->[0]}->[0], $OID->[0]]);
167 }
168 return \@list;
169}
170
171
172# returns a list of the form [doc_file]
173sub get_info {
174 my $self = shift (@_);
175 my ($OID) = @_;
176
177 if (defined $self->{'info'}->{$OID}) {
178 return $self->{'info'}->{$OID};
179 }
180
181 return undef;
182}
183
184
185# returns the number of documents so far
186sub size {
187 my $self = shift (@_);
188 return (scalar(@{$self->{'order'}}));
189}
190
1911;
192
Note: See TracBrowser for help on using the repository browser.