source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 32122

Last change on this file since 32122 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or archiveinf-doc info database equivalent)
27# -- i.e. the file generated in the archives directory when an import is done),
28# processing each file it finds
29
30package ArchivesInfPlugin;
31
32use util;
33use FileUtils;
34use doc;
35use CommonUtil;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40use strict;
41no strict 'refs'; # allow filehandles to be variables and viceversa
42
43
44BEGIN {
45 @ArchivesInfPlugin::ISA = ('CommonUtil');
46}
47
48my $arguments = [
49 { 'name' => "reversesort",
50 'desc' => "{ArchivesInfPlugin.reversesort}",
51 'type' => "flag",
52 'reqd' => "no",
53 'modegli' => "2" },
54 { 'name' => "sort",
55 'desc' => "{ArchivesInfPlugin.sort}",
56 'type' => "flag",
57 'reqd' => "no",
58 'modegli' => "2" }
59
60 ];
61
62my $options = { 'name' => "ArchivesInfPlugin",
63 'desc' => "{ArchivesInfPlugin.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments};
67
68sub gsprintf
69{
70 return &gsprintf::gsprintf(@_);
71}
72
73sub new {
74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
77
78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
80
81 my $self = new CommonUtil($pluginlist, $inputargs, $hashArgOptLists);
82
83 return bless $self, $class;
84}
85
86# called once, at the start of processing
87sub init {
88 my $self = shift (@_);
89 my ($verbosity, $outhandle, $failhandle) = @_;
90
91 # verbosity is passed through from the processor
92 $self->{'verbosity'} = $verbosity;
93
94 # as are the outhandle and failhandle
95 $self->{'outhandle'} = $outhandle if defined $outhandle;
96 $self->{'failhandle'} = $failhandle;
97
98}
99
100sub deinit {
101 my ($self) = @_;
102
103 my $archive_info = $self->{'archive_info'};
104 my $verbosity = $self->{'verbosity'};
105 my $outhandle = $self->{'outhandle'};
106
107 if (defined $archive_info) {
108 # Get the infodbtype value for this collection from the arcinfo object
109 my $infodbtype = $archive_info->{'infodbtype'};
110 my $archive_info_filename = $self->{'archive_info_filename'};
111 my $infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $archive_info_filename, "append");
112
113 my $file_list = $archive_info->get_file_list();
114
115 foreach my $subfile (@$file_list) {
116 my $doc_oid = $subfile->[1];
117
118 my $index_status = $archive_info->get_status_info($doc_oid);
119
120 if ($index_status eq "D") {
121 # delete
122 $archive_info->delete_info($doc_oid);
123 &dbutil::delete_infodb_entry($infodbtype, $infodb_file_handle, $doc_oid);
124
125 my $doc_file = $subfile->[0];
126 my $base_dir =$self->{'base_dir'};
127
128 my $doc_filename = &FileUtils::filenameConcatenate($base_dir,$doc_file);
129
130 my ($doc_tailname, $doc_dirname, $suffix)
131 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
132
133 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
134
135 &FileUtils::removeFilesRecursive($doc_dirname);
136
137
138 }
139 elsif ($index_status =~ m/^(I|R)$/) {
140 # mark as "been indexed"
141 $archive_info->set_status_info($doc_oid,"B");
142 }
143 }
144
145 &dbutil::close_infodb_write_handle($infodbtype, $infodb_file_handle);
146 $archive_info->save_info($archive_info_filename);
147 }
148}
149
150# called at the beginning of each plugin pass (import has one, buildin has many)
151sub begin {
152 my $self = shift (@_);
153 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
154
155 $self->{'base_dir'} = $base_dir;
156}
157
158sub remove_all {
159 my $self = shift (@_);
160 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
161}
162
163sub remove_one {
164 my $self = shift (@_);
165 my ($file, $oids, $archivedir) = @_;
166 return undef; # only called during import at this stage, this will never be processing a file
167
168}
169
170
171# called at the end of each plugin pass
172sub end {
173 my ($self) = shift (@_);
174
175}
176
177
178# return 1 if this class might recurse using $pluginfo
179sub is_recursive {
180 my $self = shift (@_);
181
182 return 1;
183}
184
185
186sub compile_stats {
187 my $self = shift(@_);
188 my ($stats) = @_;
189}
190
191# We don't do metadata_read
192sub metadata_read {
193 my $self = shift (@_);
194 my ($pluginfo, $base_dir, $file, $block_hash,
195 $extrametakeys, $extrametadata, $extrametafile,
196 $processor, $gli, $aux) = @_;
197
198 return undef;
199}
200
201sub file_block_read {
202
203 my $self = shift (@_);
204 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
205
206 if ($file eq "OIDcount") {
207 my ($filename_full_path, $filename_no_path)
208 = &util::get_full_filenames($base_dir, $file);
209 $self->block_raw_filename($block_hash,$filename_full_path);
210 return 1;
211 }
212
213 # otherwise, we don't do any file blocking
214
215 return undef;
216}
217
218
219# return number of files processed, undef if can't process
220# Note that $base_dir might be "" and that $file might
221# include directories
222sub read {
223 my $self = shift (@_);
224 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
225 my $outhandle = $self->{'outhandle'};
226
227 my $count = 0;
228
229 # This function only makes sense at build-time
230 return if (ref($processor) !~ /buildproc$/i);
231
232 # Get the infodbtype value for this collection from the buildproc object
233 my $infodbtype = $processor->{'infodbtype'};
234 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
235
236 # see if this has a archives information file within it
237## my $archive_info_filename = &FileUtils::filenameConcatenate($base_dir,$file,"archives.inf");
238 my $archive_info_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", &FileUtils::filenameConcatenate($base_dir, $file));
239
240 if (-e $archive_info_filename) {
241
242 # found an archives.inf file
243 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
244
245 # read in the archives information file
246 my $archive_info = new arcinfo($infodbtype);
247 $self->{'archive_info'} = $archive_info;
248 $self->{'archive_info_filename'} = $archive_info_filename;
249 if ($self->{'reversesort'}) {
250 $archive_info->reverse_sort();
251 } elsif ($self->{'sort'}) {
252 $archive_info->sort();
253 }
254
255 $archive_info->load_info ($archive_info_filename);
256
257 my $file_list = $archive_info->get_file_list();
258
259 # process each file
260 foreach my $subfile (@$file_list) {
261
262 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
263
264 my $tmp = &FileUtils::filenameConcatenate($file, $subfile->[0]);
265 next if $tmp eq $file;
266
267 my $doc_oid = $subfile->[1];
268 my $index_status = $archive_info->get_status_info($doc_oid);
269
270 my $curr_mode = $processor->get_mode();
271 my $new_mode = $curr_mode;
272 my $group_position = $archive_info->get_group_position($doc_oid);
273
274 # Start by assuming we want to process the file...
275 my $process_file = 1;
276
277 # ... unless we have processed files into a group doc.xml, in which case we only process the xml for the first one
278 if (defined $group_position && $group_position >1) {
279 $process_file = 0;
280 }
281 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
282 elsif ($processor->is_incremental_capable() && $self->{'incremental'})
283 {
284 # Check to see if the file needs indexing
285 if ($index_status eq "B")
286 {
287 # Don't process this file as it has already been indexed
288 $process_file = 0;
289 }
290 elsif ($index_status eq "D") {
291 # Need to be delete it from the index.
292 $new_mode = $curr_mode."delete";
293 $process_file = 1;
294 }
295 elsif ($index_status eq "R") {
296 # Need to be reindexed/replaced
297 $new_mode = $curr_mode."reindex";
298
299 $process_file = 1;
300 }
301 }
302 # ... or we're being asked to delete it (in which case skip it)
303 elsif ($index_status eq "D") {
304 # Non-incremental Delete
305 # It's already been deleted from the archives directory
306 # (done during import.pl)
307 # => All we need to do here is not process it
308
309 $process_file = 0;
310 }
311
312 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
313 # Nag feature
314 if (!defined $self->{'incremental-warning'}) {
315 print $outhandle "\n";
316 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
317 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
318 print $outhandle "\n";
319 sleep 10;
320 $self->{'incremental-warning'} = 1;
321 }
322 }
323
324 if ($process_file) {
325 # note: metadata is not carried on to the next level
326
327 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
328
329 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
330
331 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
332 }
333 }
334
335 return $count;
336 }
337
338
339 # wasn't an archives directory, someone else will have to process it
340 return undef;
341}
342
3431;
Note: See TracBrowser for help on using the repository browser.