source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 20999

Last change on this file since 20999 was 20760, checked in by kjdon, 15 years ago

need to add args to options to get the args to show up in pluginfo.pl

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or GDBM equivalent,
27# archiveinf-doc.gdb file (i.e. the file generated in the
28# archives directory when an import is done), processing each file it
29# finds
30
31package ArchivesInfPlugin;
32
33use util;
34use doc;
35use PrintInfo;
36use plugin;
37use arcinfo;
38use gsprintf;
39use GDBMUtils;
40
41use strict;
42no strict 'refs'; # allow filehandles to be variables and viceversa
43
44BEGIN {
45 @ArchivesInfPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments = [
49 { 'name' => "reversesort",
50 'desc' => "{ArchivesInfPlugin.reversesort}",
51 'type' => "flag",
52 'reqd' => "no",
53 'modegli' => "2" },
54
55 ];
56
57my $options = { 'name' => "ArchivesInfPlugin",
58 'desc' => "{ArchivesInfPlugin.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments};
62
63sub gsprintf
64{
65 return &gsprintf::gsprintf(@_);
66}
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74 push(@{$hashArgOptLists->{"OptList"}},$options);
75
76 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
77
78 return bless $self, $class;
79}
80
81# called once, at the start of processing
82sub init {
83 my $self = shift (@_);
84 my ($verbosity, $outhandle, $failhandle) = @_;
85
86 # verbosity is passed through from the processor
87 $self->{'verbosity'} = $verbosity;
88
89 # as are the outhandle and failhandle
90 $self->{'outhandle'} = $outhandle if defined $outhandle;
91 $self->{'failhandle'} = $failhandle;
92
93}
94
95sub deinit {
96 my ($self) = @_;
97
98 my $archive_info = $self->{'archive_info'};
99 my $verbosity = $self->{'verbosity'};
100 my $outhandle = $self->{'outhandle'};
101
102 if (defined $archive_info) {
103 my $archive_info_filename = $self->{'archive_info_filename'};
104
105 my $file_list = $archive_info->get_file_list();
106
107 foreach my $subfile (@$file_list) {
108 my $doc_oid = $subfile->[1];
109
110 my $index_status = $archive_info->get_status_info($doc_oid);
111
112 if ($index_status eq "D") {
113 # delete
114 $archive_info->delete_info($doc_oid);
115 &GDBMUtils::gdbmDatabaseRemove($archive_info_filename,$doc_oid);
116
117 my $doc_file = $subfile->[0];
118 my $base_dir =$self->{'base_dir'};
119
120 my $doc_filename = &util::filename_cat($base_dir,$doc_file);
121
122 my ($doc_tailname, $doc_dirname, $suffix)
123 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
124
125 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
126
127 &util::rm_r($doc_dirname);
128
129
130 }
131 elsif ($index_status =~ m/^(I|R)$/) {
132 # mark as "been indexed"
133 $archive_info->set_status_info($doc_oid,"B");
134 }
135 }
136
137 $archive_info->save_info($archive_info_filename);
138 }
139}
140
141# called at the beginning of each plugin pass (import has one, buildin has many)
142sub begin {
143 my $self = shift (@_);
144 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
145
146 $self->{'base_dir'} = $base_dir;
147}
148
149# called at the end of each plugin pass
150sub end {
151 my ($self) = shift (@_);
152
153}
154
155
156# return 1 if this class might recurse using $pluginfo
157sub is_recursive {
158 my $self = shift (@_);
159
160 return 1;
161}
162
163
164sub compile_stats {
165 my $self = shift(@_);
166 my ($stats) = @_;
167}
168
169# We don't do metadata_read
170sub metadata_read {
171 my $self = shift (@_);
172 my ($pluginfo, $base_dir, $file, $block_hash,
173 $extrametakeys, $extrametadata, $extrametafile,
174 $processor, $maxdocs, $gli) = @_;
175
176 return undef;
177}
178
179sub file_block_read {
180
181 my $self = shift (@_);
182 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
183
184 if ($file eq "OIDcount") {
185 my ($filename_full_path, $filename_no_path)
186 = &util::get_full_filenames($base_dir, $file);
187 $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
188 return 1;
189 }
190
191 # otherwise, we don't do any file blocking
192
193 return undef;
194}
195
196
197# return number of files processed, undef if can't process
198# Note that $base_dir might be "" and that $file might
199# include directories
200sub read {
201 my $self = shift (@_);
202 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
203 my $outhandle = $self->{'outhandle'};
204
205 my $count = 0;
206
207 # see if this has a archives information file within it
208## my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
209
210 my $doc_db = "archiveinf-doc.gdb";
211 my $archive_info_filename = &util::filename_cat($base_dir,$file,$doc_db);
212
213 if (-e $archive_info_filename) {
214
215 # found an archives.inf file
216 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
217
218 # read in the archives information file
219 my $archive_info = new arcinfo ();
220 $self->{'archive_info'} = $archive_info;
221 $self->{'archive_info_filename'} = $archive_info_filename;
222 if ($self->{'reversesort'}) {
223 $archive_info->reverse_sort();
224 }
225
226 $archive_info->load_info ($archive_info_filename);
227
228 my $file_list = $archive_info->get_file_list();
229
230 # process each file
231 foreach my $subfile (@$file_list) {
232
233 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
234
235 my $tmp = &util::filename_cat ($file, $subfile->[0]);
236 next if $tmp eq $file;
237
238 my $doc_oid = $subfile->[1];
239 my $index_status = $archive_info->get_status_info($doc_oid);
240
241 my $curr_mode = $processor->get_mode();
242 my $new_mode = $curr_mode;
243
244 # Start by assuming we want to process the file...
245 my $process_file = 1;
246
247 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
248 if ($processor->is_incremental_capable() && $self->{'incremental'})
249 {
250 # Check to see if the file needs indexing
251 if ($index_status eq "B")
252 {
253 # Don't process this file as it has already been indexed
254 $process_file = 0;
255 }
256 elsif ($index_status eq "D") {
257 # Need to be delete it from the index.
258 $new_mode = $curr_mode."delete";
259 $process_file = 1;
260 }
261 elsif ($index_status eq "R") {
262 # Need to be reindexed/replaced
263 $new_mode = $curr_mode."reindex";
264
265 $process_file = 1;
266 }
267 }
268 # ... or we're being asked to delete it (in which case skip it)
269 elsif ($index_status eq "D") {
270 # Non-incremental Delete
271 # It's already been deleted from the archives directory
272 # (done during import.pl)
273 # => All we need to do here is not process it
274
275 $process_file = 0;
276 }
277
278 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
279 # Nag feature
280 if (!defined $self->{'incremental-warning'}) {
281 print $outhandle "\n";
282 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
283 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
284 print $outhandle "\n";
285 sleep 10;
286 $self->{'incremental-warning'} = 1;
287 }
288 }
289
290 if ($process_file) {
291 # note: metadata is not carried on to the next level
292
293 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
294
295 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
296
297 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
298 }
299 }
300
301 return $count;
302 }
303
304
305 # wasn't an archives directory, someone else will have to process it
306 return undef;
307}
308
3091;
Note: See TracBrowser for help on using the repository browser.