source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 21564

Last change on this file since 21564 was 21564, checked in by mdewsnip, 14 years ago

Changed lots of occurrences of "GDBM" in comments, variable names and function names, where the code isn't GDBM-specific. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or info database equivalent,
27# archiveinf-doc.gdb file -- i.e. the file generated in the
28# archives directory when an import is done), processing each file it
29# finds
30
31package ArchivesInfPlugin;
32
33use util;
34use doc;
35use PrintInfo;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40use strict;
41no strict 'refs'; # allow filehandles to be variables and viceversa
42
43
44BEGIN {
45 @ArchivesInfPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments = [
49 { 'name' => "reversesort",
50 'desc' => "{ArchivesInfPlugin.reversesort}",
51 'type' => "flag",
52 'reqd' => "no",
53 'modegli' => "2" },
54
55 ];
56
57my $options = { 'name' => "ArchivesInfPlugin",
58 'desc' => "{ArchivesInfPlugin.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments};
62
63sub gsprintf
64{
65 return &gsprintf::gsprintf(@_);
66}
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74 push(@{$hashArgOptLists->{"OptList"}},$options);
75
76 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
77
78 return bless $self, $class;
79}
80
81# called once, at the start of processing
82sub init {
83 my $self = shift (@_);
84 my ($verbosity, $outhandle, $failhandle) = @_;
85
86 # verbosity is passed through from the processor
87 $self->{'verbosity'} = $verbosity;
88
89 # as are the outhandle and failhandle
90 $self->{'outhandle'} = $outhandle if defined $outhandle;
91 $self->{'failhandle'} = $failhandle;
92
93}
94
95sub deinit {
96 my ($self) = @_;
97
98 my $archive_info = $self->{'archive_info'};
99 my $verbosity = $self->{'verbosity'};
100 my $outhandle = $self->{'outhandle'};
101
102 if (defined $archive_info) {
103 my $archive_info_filename = $self->{'archive_info_filename'};
104 my $infodb_file_handle = &dbutil::open_infodb_write_handle("gdbm", $archive_info_filename, "append");
105
106 my $file_list = $archive_info->get_file_list();
107
108 foreach my $subfile (@$file_list) {
109 my $doc_oid = $subfile->[1];
110
111 my $index_status = $archive_info->get_status_info($doc_oid);
112
113 if ($index_status eq "D") {
114 # delete
115 $archive_info->delete_info($doc_oid);
116 &dbutil::delete_infodb_entry("gdbm", $infodb_file_handle, $doc_oid);
117
118 my $doc_file = $subfile->[0];
119 my $base_dir =$self->{'base_dir'};
120
121 my $doc_filename = &util::filename_cat($base_dir,$doc_file);
122
123 my ($doc_tailname, $doc_dirname, $suffix)
124 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
125
126 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
127
128 &util::rm_r($doc_dirname);
129
130
131 }
132 elsif ($index_status =~ m/^(I|R)$/) {
133 # mark as "been indexed"
134 $archive_info->set_status_info($doc_oid,"B");
135 }
136 }
137
138 &dbutil::close_infodb_write_handle("gdbm", $infodb_file_handle);
139 $archive_info->save_info($archive_info_filename);
140 }
141}
142
143# called at the beginning of each plugin pass (import has one, buildin has many)
144sub begin {
145 my $self = shift (@_);
146 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
147
148 $self->{'base_dir'} = $base_dir;
149}
150
151sub remove_all {
152 my $self = shift (@_);
153 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
154
155}
156
157sub remove_one {
158 my $self = shift (@_);
159 my ($file, $oids, $archivedir) = @_;
160 return undef; # only called during import at this stage, this will never be processing a file
161
162}
163
164
165# called at the end of each plugin pass
166sub end {
167 my ($self) = shift (@_);
168
169}
170
171
172# return 1 if this class might recurse using $pluginfo
173sub is_recursive {
174 my $self = shift (@_);
175
176 return 1;
177}
178
179
180sub compile_stats {
181 my $self = shift(@_);
182 my ($stats) = @_;
183}
184
185# We don't do metadata_read
186sub metadata_read {
187 my $self = shift (@_);
188 my ($pluginfo, $base_dir, $file, $block_hash,
189 $extrametakeys, $extrametadata, $extrametafile,
190 $processor, $maxdocs, $gli) = @_;
191
192 return undef;
193}
194
195sub file_block_read {
196
197 my $self = shift (@_);
198 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
199
200 if ($file eq "OIDcount") {
201 my ($filename_full_path, $filename_no_path)
202 = &util::get_full_filenames($base_dir, $file);
203 $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
204 return 1;
205 }
206
207 # otherwise, we don't do any file blocking
208
209 return undef;
210}
211
212
213# return number of files processed, undef if can't process
214# Note that $base_dir might be "" and that $file might
215# include directories
216sub read {
217 my $self = shift (@_);
218 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
219 my $outhandle = $self->{'outhandle'};
220
221 my $count = 0;
222
223 # see if this has a archives information file within it
224## my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
225
226 my $doc_db = "archiveinf-doc.gdb";
227 my $archive_info_filename = &util::filename_cat($base_dir,$file,$doc_db);
228
229 if (-e $archive_info_filename) {
230
231 # found an archives.inf file
232 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
233
234 # read in the archives information file
235 my $archive_info = new arcinfo ();
236 $self->{'archive_info'} = $archive_info;
237 $self->{'archive_info_filename'} = $archive_info_filename;
238 if ($self->{'reversesort'}) {
239 $archive_info->reverse_sort();
240 }
241
242 $archive_info->load_info ($archive_info_filename);
243
244 my $file_list = $archive_info->get_file_list();
245
246 # process each file
247 foreach my $subfile (@$file_list) {
248
249 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
250
251 my $tmp = &util::filename_cat ($file, $subfile->[0]);
252 next if $tmp eq $file;
253
254 my $doc_oid = $subfile->[1];
255 my $index_status = $archive_info->get_status_info($doc_oid);
256
257 my $curr_mode = $processor->get_mode();
258 my $new_mode = $curr_mode;
259
260 # Start by assuming we want to process the file...
261 my $process_file = 1;
262
263 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
264 if ($processor->is_incremental_capable() && $self->{'incremental'})
265 {
266 # Check to see if the file needs indexing
267 if ($index_status eq "B")
268 {
269 # Don't process this file as it has already been indexed
270 $process_file = 0;
271 }
272 elsif ($index_status eq "D") {
273 # Need to be delete it from the index.
274 $new_mode = $curr_mode."delete";
275 $process_file = 1;
276 }
277 elsif ($index_status eq "R") {
278 # Need to be reindexed/replaced
279 $new_mode = $curr_mode."reindex";
280
281 $process_file = 1;
282 }
283 }
284 # ... or we're being asked to delete it (in which case skip it)
285 elsif ($index_status eq "D") {
286 # Non-incremental Delete
287 # It's already been deleted from the archives directory
288 # (done during import.pl)
289 # => All we need to do here is not process it
290
291 $process_file = 0;
292 }
293
294 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
295 # Nag feature
296 if (!defined $self->{'incremental-warning'}) {
297 print $outhandle "\n";
298 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
299 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
300 print $outhandle "\n";
301 sleep 10;
302 $self->{'incremental-warning'} = 1;
303 }
304 }
305
306 if ($process_file) {
307 # note: metadata is not carried on to the next level
308
309 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
310
311 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
312
313 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
314 }
315 }
316
317 return $count;
318 }
319
320
321 # wasn't an archives directory, someone else will have to process it
322 return undef;
323}
324
3251;
Note: See TracBrowser for help on using the repository browser.