source: gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm@ 20758

Last change on this file since 20758 was 20758, checked in by kjdon, 15 years ago

moved the reversesort option from import.pl to ArchivesInfPLugin - this is where the list of docs to be processed is generated, and therefore where we need to set reversesort. Better here than storing in the database, as then we can change the order without having to reimport.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or GDBM equivalent,
27# archiveinf-doc.gdb file (i.e. the file generated in the
28# archives directory when an import is done), processing each file it
29# finds
30
31package ArchivesInfPlugin;
32
33use util;
34use doc;
35use PrintInfo;
36use plugin;
37use arcinfo;
38use gsprintf;
39use GDBMUtils;
40
41use strict;
42no strict 'refs'; # allow filehandles to be variables and viceversa
43
44BEGIN {
45 @ArchivesInfPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments = [
49 { 'name' => "reversesort",
50 'desc' => "{ArchivesInfPlugin.reversesort}",
51 'type' => "flag",
52 'reqd' => "no",
53 'modegli' => "2" },
54
55 ];
56
57my $options = { 'name' => "ArchivesInfPlugin",
58 'desc' => "{ArchivesInfPlugin.desc}",
59 'abstract' => "no",
60 'inherits' => "yes" };
61
62sub gsprintf
63{
64 return &gsprintf::gsprintf(@_);
65}
66
67sub new {
68 my ($class) = shift (@_);
69 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
70 push(@$pluginlist, $class);
71
72 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
73 push(@{$hashArgOptLists->{"OptList"}},$options);
74
75 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
76
77 return bless $self, $class;
78}
79
80# called once, at the start of processing
81sub init {
82 my $self = shift (@_);
83 my ($verbosity, $outhandle, $failhandle) = @_;
84
85 # verbosity is passed through from the processor
86 $self->{'verbosity'} = $verbosity;
87
88 # as are the outhandle and failhandle
89 $self->{'outhandle'} = $outhandle if defined $outhandle;
90 $self->{'failhandle'} = $failhandle;
91
92}
93
94sub deinit {
95 my ($self) = @_;
96
97 my $archive_info = $self->{'archive_info'};
98 my $verbosity = $self->{'verbosity'};
99 my $outhandle = $self->{'outhandle'};
100
101 if (defined $archive_info) {
102 my $archive_info_filename = $self->{'archive_info_filename'};
103
104 my $file_list = $archive_info->get_file_list();
105
106 foreach my $subfile (@$file_list) {
107 my $doc_oid = $subfile->[1];
108
109 my $index_status = $archive_info->get_status_info($doc_oid);
110
111 if ($index_status eq "D") {
112 # delete
113 $archive_info->delete_info($doc_oid);
114 &GDBMUtils::gdbmDatabaseRemove($archive_info_filename,$doc_oid);
115
116 my $doc_file = $subfile->[0];
117 my $base_dir =$self->{'base_dir'};
118
119 my $doc_filename = &util::filename_cat($base_dir,$doc_file);
120
121 my ($doc_tailname, $doc_dirname, $suffix)
122 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
123
124 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
125
126 &util::rm_r($doc_dirname);
127
128
129 }
130 elsif ($index_status =~ m/^(I|R)$/) {
131 # mark as "been indexed"
132 $archive_info->set_status_info($doc_oid,"B");
133 }
134 }
135
136 $archive_info->save_info($archive_info_filename);
137 }
138}
139
140# called at the beginning of each plugin pass (import has one, buildin has many)
141sub begin {
142 my $self = shift (@_);
143 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144
145 $self->{'base_dir'} = $base_dir;
146}
147
148# called at the end of each plugin pass
149sub end {
150 my ($self) = shift (@_);
151
152}
153
154
155# return 1 if this class might recurse using $pluginfo
156sub is_recursive {
157 my $self = shift (@_);
158
159 return 1;
160}
161
162
163sub compile_stats {
164 my $self = shift(@_);
165 my ($stats) = @_;
166}
167
168# We don't do metadata_read
169sub metadata_read {
170 my $self = shift (@_);
171 my ($pluginfo, $base_dir, $file, $block_hash,
172 $extrametakeys, $extrametadata, $extrametafile,
173 $processor, $maxdocs, $gli) = @_;
174
175 return undef;
176}
177
178sub file_block_read {
179
180 my $self = shift (@_);
181 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
182
183 if ($file eq "OIDcount") {
184 my ($filename_full_path, $filename_no_path)
185 = &util::get_full_filenames($base_dir, $file);
186 $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
187 return 1;
188 }
189
190 # otherwise, we don't do any file blocking
191
192 return undef;
193}
194
195
196# return number of files processed, undef if can't process
197# Note that $base_dir might be "" and that $file might
198# include directories
199sub read {
200 my $self = shift (@_);
201 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
202 my $outhandle = $self->{'outhandle'};
203
204 my $count = 0;
205
206 # see if this has a archives information file within it
207## my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
208
209 my $doc_db = "archiveinf-doc.gdb";
210 my $archive_info_filename = &util::filename_cat($base_dir,$file,$doc_db);
211
212 if (-e $archive_info_filename) {
213
214 # found an archives.inf file
215 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
216
217 # read in the archives information file
218 my $archive_info = new arcinfo ();
219 $self->{'archive_info'} = $archive_info;
220 $self->{'archive_info_filename'} = $archive_info_filename;
221 if ($self->{'reversesort'}) {
222 $archive_info->reverse_sort();
223 }
224
225 $archive_info->load_info ($archive_info_filename);
226
227 my $file_list = $archive_info->get_file_list();
228
229 # process each file
230 foreach my $subfile (@$file_list) {
231
232 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
233
234 my $tmp = &util::filename_cat ($file, $subfile->[0]);
235 next if $tmp eq $file;
236
237 my $doc_oid = $subfile->[1];
238 my $index_status = $archive_info->get_status_info($doc_oid);
239
240 my $curr_mode = $processor->get_mode();
241 my $new_mode = $curr_mode;
242
243 # Start by assuming we want to process the file...
244 my $process_file = 1;
245
246 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
247 if ($processor->is_incremental_capable() && $self->{'incremental'})
248 {
249 # Check to see if the file needs indexing
250 if ($index_status eq "B")
251 {
252 # Don't process this file as it has already been indexed
253 $process_file = 0;
254 }
255 elsif ($index_status eq "D") {
256 # Need to be delete it from the index.
257 $new_mode = $curr_mode."delete";
258 $process_file = 1;
259 }
260 elsif ($index_status eq "R") {
261 # Need to be reindexed/replaced
262 $new_mode = $curr_mode."reindex";
263
264 $process_file = 1;
265 }
266 }
267 # ... or we're being asked to delete it (in which case skip it)
268 elsif ($index_status eq "D") {
269 # Non-incremental Delete
270 # It's already been deleted from the archives directory
271 # (done during import.pl)
272 # => All we need to do here is not process it
273
274 $process_file = 0;
275 }
276
277 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
278 # Nag feature
279 if (!defined $self->{'incremental-warning'}) {
280 print $outhandle "\n";
281 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
282 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
283 print $outhandle "\n";
284 sleep 10;
285 $self->{'incremental-warning'} = 1;
286 }
287 }
288
289 if ($process_file) {
290 # note: metadata is not carried on to the next level
291
292 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
293
294 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
295
296 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
297 }
298 }
299
300 return $count;
301 }
302
303
304 # wasn't an archives directory, someone else will have to process it
305 return undef;
306}
307
3081;
Note: See TracBrowser for help on using the repository browser.