source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 23484

Last change on this file since 23484 was 23419, checked in by max, 13 years ago

Setting the values to store as block files is now done through an API call to BasePlugin. This way, anything uniform requirement (such as putting in both C:\... and c:\... entries for Windows) can be done in one place.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or archiveinf-doc info database equivalent)
27# -- i.e. the file generated in the archives directory when an import is done),
28# processing each file it finds
29
30package ArchivesInfPlugin;
31
32use util;
33use doc;
34use PrintInfo;
35use plugin;
36use arcinfo;
37use gsprintf;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42
43BEGIN {
44 @ArchivesInfPlugin::ISA = ('PrintInfo');
45}
46
47my $arguments = [
48 { 'name' => "reversesort",
49 'desc' => "{ArchivesInfPlugin.reversesort}",
50 'type' => "flag",
51 'reqd' => "no",
52 'modegli' => "2" },
53
54 ];
55
56my $options = { 'name' => "ArchivesInfPlugin",
57 'desc' => "{ArchivesInfPlugin.desc}",
58 'abstract' => "no",
59 'inherits' => "yes",
60 'args' => $arguments};
61
62sub gsprintf
63{
64 return &gsprintf::gsprintf(@_);
65}
66
67sub new {
68 my ($class) = shift (@_);
69 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
70 push(@$pluginlist, $class);
71
72 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
73 push(@{$hashArgOptLists->{"OptList"}},$options);
74
75 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
76
77 return bless $self, $class;
78}
79
80# called once, at the start of processing
81sub init {
82 my $self = shift (@_);
83 my ($verbosity, $outhandle, $failhandle) = @_;
84
85 # verbosity is passed through from the processor
86 $self->{'verbosity'} = $verbosity;
87
88 # as are the outhandle and failhandle
89 $self->{'outhandle'} = $outhandle if defined $outhandle;
90 $self->{'failhandle'} = $failhandle;
91
92}
93
94sub deinit {
95 my ($self) = @_;
96
97 my $archive_info = $self->{'archive_info'};
98 my $verbosity = $self->{'verbosity'};
99 my $outhandle = $self->{'outhandle'};
100
101 if (defined $archive_info) {
102 # Get the infodbtype value for this collection from the arcinfo object
103 my $infodbtype = $archive_info->{'infodbtype'};
104 my $archive_info_filename = $self->{'archive_info_filename'};
105 my $infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $archive_info_filename, "append");
106
107 my $file_list = $archive_info->get_file_list();
108
109 foreach my $subfile (@$file_list) {
110 my $doc_oid = $subfile->[1];
111
112 my $index_status = $archive_info->get_status_info($doc_oid);
113
114 if ($index_status eq "D") {
115 # delete
116 $archive_info->delete_info($doc_oid);
117 &dbutil::delete_infodb_entry($infodbtype, $infodb_file_handle, $doc_oid);
118
119 my $doc_file = $subfile->[0];
120 my $base_dir =$self->{'base_dir'};
121
122 my $doc_filename = &util::filename_cat($base_dir,$doc_file);
123
124 my ($doc_tailname, $doc_dirname, $suffix)
125 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
126
127 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
128
129 &util::rm_r($doc_dirname);
130
131
132 }
133 elsif ($index_status =~ m/^(I|R)$/) {
134 # mark as "been indexed"
135 $archive_info->set_status_info($doc_oid,"B");
136 }
137 }
138
139 &dbutil::close_infodb_write_handle($infodbtype, $infodb_file_handle);
140 $archive_info->save_info($archive_info_filename);
141 }
142}
143
144# called at the beginning of each plugin pass (import has one, buildin has many)
145sub begin {
146 my $self = shift (@_);
147 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148
149 $self->{'base_dir'} = $base_dir;
150}
151
152sub remove_all {
153 my $self = shift (@_);
154 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
155
156}
157
158sub remove_one {
159 my $self = shift (@_);
160 my ($file, $oids, $archivedir) = @_;
161 return undef; # only called during import at this stage, this will never be processing a file
162
163}
164
165
166# called at the end of each plugin pass
167sub end {
168 my ($self) = shift (@_);
169
170}
171
172
173# return 1 if this class might recurse using $pluginfo
174sub is_recursive {
175 my $self = shift (@_);
176
177 return 1;
178}
179
180
181sub compile_stats {
182 my $self = shift(@_);
183 my ($stats) = @_;
184}
185
186# We don't do metadata_read
187sub metadata_read {
188 my $self = shift (@_);
189 my ($pluginfo, $base_dir, $file, $block_hash,
190 $extrametakeys, $extrametadata, $extrametafile,
191 $processor, $gli, $aux) = @_;
192
193 return undef;
194}
195
196sub file_block_read {
197
198 my $self = shift (@_);
199 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
200
201 if ($file eq "OIDcount") {
202 my ($filename_full_path, $filename_no_path)
203 = &util::get_full_filenames($base_dir, $file);
204 $self->block_filename($block_hash,$filename_full_path);
205 return 1;
206 }
207
208 # otherwise, we don't do any file blocking
209
210 return undef;
211}
212
213
214# return number of files processed, undef if can't process
215# Note that $base_dir might be "" and that $file might
216# include directories
217sub read {
218 my $self = shift (@_);
219 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
220 my $outhandle = $self->{'outhandle'};
221
222 my $count = 0;
223
224 # This function only makes sense at build-time
225 return if (ref($processor) !~ /buildproc$/);
226
227 # Get the infodbtype value for this collection from the buildproc object
228 my $infodbtype = $processor->{'infodbtype'};
229 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
230
231 # see if this has a archives information file within it
232## my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
233 my $archive_info_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", &util::filename_cat($base_dir, $file));
234
235 if (-e $archive_info_filename) {
236
237 # found an archives.inf file
238 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
239
240 # read in the archives information file
241 my $archive_info = new arcinfo($infodbtype);
242 $self->{'archive_info'} = $archive_info;
243 $self->{'archive_info_filename'} = $archive_info_filename;
244 if ($self->{'reversesort'}) {
245 $archive_info->reverse_sort();
246 }
247
248 $archive_info->load_info ($archive_info_filename);
249
250 my $file_list = $archive_info->get_file_list();
251
252 # process each file
253 foreach my $subfile (@$file_list) {
254
255 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
256
257 my $tmp = &util::filename_cat ($file, $subfile->[0]);
258 next if $tmp eq $file;
259
260 my $doc_oid = $subfile->[1];
261 my $index_status = $archive_info->get_status_info($doc_oid);
262
263 my $curr_mode = $processor->get_mode();
264 my $new_mode = $curr_mode;
265
266 # Start by assuming we want to process the file...
267 my $process_file = 1;
268
269 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
270 if ($processor->is_incremental_capable() && $self->{'incremental'})
271 {
272 # Check to see if the file needs indexing
273 if ($index_status eq "B")
274 {
275 # Don't process this file as it has already been indexed
276 $process_file = 0;
277 }
278 elsif ($index_status eq "D") {
279 # Need to be delete it from the index.
280 $new_mode = $curr_mode."delete";
281 $process_file = 1;
282 }
283 elsif ($index_status eq "R") {
284 # Need to be reindexed/replaced
285 $new_mode = $curr_mode."reindex";
286
287 $process_file = 1;
288 }
289 }
290 # ... or we're being asked to delete it (in which case skip it)
291 elsif ($index_status eq "D") {
292 # Non-incremental Delete
293 # It's already been deleted from the archives directory
294 # (done during import.pl)
295 # => All we need to do here is not process it
296
297 $process_file = 0;
298 }
299
300 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
301 # Nag feature
302 if (!defined $self->{'incremental-warning'}) {
303 print $outhandle "\n";
304 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
305 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
306 print $outhandle "\n";
307 sleep 10;
308 $self->{'incremental-warning'} = 1;
309 }
310 }
311
312 if ($process_file) {
313 # note: metadata is not carried on to the next level
314
315 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
316
317 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
318
319 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
320 }
321 }
322
323 return $count;
324 }
325
326
327 # wasn't an archives directory, someone else will have to process it
328 return undef;
329}
330
3311;
Note: See TracBrowser for help on using the repository browser.