source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 27306

Last change on this file since 27306 was 27306, checked in by jmt12, 11 years ago

Moving the critical file-related functions (copy, rm, etc) out of util.pm into their own proper class FileUtils. Use of the old functions in util.pm will prompt deprecated warning messages. There may be further functions that could be moved across in the future, but these are the critical ones when considering supporting other filesystems (HTTP, HDFS, WebDav, etc). Updated some key files to use the new functions so now deprecated messages thrown when importing/building demo collection 'out of the box'

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or archiveinf-doc info database equivalent)
27# -- i.e. the file generated in the archives directory when an import is done),
28# processing each file it finds
29
30package ArchivesInfPlugin;
31
32use util;
33use FileUtils;
34use doc;
35use PrintInfo;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40use strict;
41no strict 'refs'; # allow filehandles to be variables and viceversa
42
43
44BEGIN {
45 @ArchivesInfPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments = [
49 { 'name' => "reversesort",
50 'desc' => "{ArchivesInfPlugin.reversesort}",
51 'type' => "flag",
52 'reqd' => "no",
53 'modegli' => "2" },
54
55 ];
56
57my $options = { 'name' => "ArchivesInfPlugin",
58 'desc' => "{ArchivesInfPlugin.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments};
62
63sub gsprintf
64{
65 return &gsprintf::gsprintf(@_);
66}
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74 push(@{$hashArgOptLists->{"OptList"}},$options);
75
76 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
77
78 return bless $self, $class;
79}
80
81# called once, at the start of processing
82sub init {
83 my $self = shift (@_);
84 my ($verbosity, $outhandle, $failhandle) = @_;
85
86 # verbosity is passed through from the processor
87 $self->{'verbosity'} = $verbosity;
88
89 # as are the outhandle and failhandle
90 $self->{'outhandle'} = $outhandle if defined $outhandle;
91 $self->{'failhandle'} = $failhandle;
92
93}
94
95sub deinit {
96 my ($self) = @_;
97
98 my $archive_info = $self->{'archive_info'};
99 my $verbosity = $self->{'verbosity'};
100 my $outhandle = $self->{'outhandle'};
101
102 if (defined $archive_info) {
103 # Get the infodbtype value for this collection from the arcinfo object
104 my $infodbtype = $archive_info->{'infodbtype'};
105 my $archive_info_filename = $self->{'archive_info_filename'};
106 my $infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $archive_info_filename, "append");
107
108 my $file_list = $archive_info->get_file_list();
109
110 foreach my $subfile (@$file_list) {
111 my $doc_oid = $subfile->[1];
112
113 my $index_status = $archive_info->get_status_info($doc_oid);
114
115 if ($index_status eq "D") {
116 # delete
117 $archive_info->delete_info($doc_oid);
118 &dbutil::delete_infodb_entry($infodbtype, $infodb_file_handle, $doc_oid);
119
120 my $doc_file = $subfile->[0];
121 my $base_dir =$self->{'base_dir'};
122
123 my $doc_filename = &FileUtils::filenameConcatenate($base_dir,$doc_file);
124
125 my ($doc_tailname, $doc_dirname, $suffix)
126 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
127
128 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
129
130 &util::rm_r($doc_dirname);
131
132
133 }
134 elsif ($index_status =~ m/^(I|R)$/) {
135 # mark as "been indexed"
136 $archive_info->set_status_info($doc_oid,"B");
137 }
138 }
139
140 &dbutil::close_infodb_write_handle($infodbtype, $infodb_file_handle);
141 $archive_info->save_info($archive_info_filename);
142 }
143}
144
145# called at the beginning of each plugin pass (import has one, buildin has many)
146sub begin {
147 my $self = shift (@_);
148 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
149
150 $self->{'base_dir'} = $base_dir;
151}
152
153sub remove_all {
154 my $self = shift (@_);
155 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
156}
157
158sub remove_one {
159 my $self = shift (@_);
160 my ($file, $oids, $archivedir) = @_;
161 return undef; # only called during import at this stage, this will never be processing a file
162
163}
164
165
166# called at the end of each plugin pass
167sub end {
168 my ($self) = shift (@_);
169
170}
171
172
173# return 1 if this class might recurse using $pluginfo
174sub is_recursive {
175 my $self = shift (@_);
176
177 return 1;
178}
179
180
181sub compile_stats {
182 my $self = shift(@_);
183 my ($stats) = @_;
184}
185
186# We don't do metadata_read
187sub metadata_read {
188 my $self = shift (@_);
189 my ($pluginfo, $base_dir, $file, $block_hash,
190 $extrametakeys, $extrametadata, $extrametafile,
191 $processor, $gli, $aux) = @_;
192
193 return undef;
194}
195
196sub file_block_read {
197
198 my $self = shift (@_);
199 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
200
201 if ($file eq "OIDcount") {
202 my ($filename_full_path, $filename_no_path)
203 = &util::get_full_filenames($base_dir, $file);
204 &util::block_filename($block_hash,$filename_full_path);
205 return 1;
206 }
207
208 # otherwise, we don't do any file blocking
209
210 return undef;
211}
212
213
214# return number of files processed, undef if can't process
215# Note that $base_dir might be "" and that $file might
216# include directories
217sub read {
218 my $self = shift (@_);
219 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
220 my $outhandle = $self->{'outhandle'};
221
222 my $count = 0;
223
224 # This function only makes sense at build-time
225 return if (ref($processor) !~ /buildproc$/i);
226
227 # Get the infodbtype value for this collection from the buildproc object
228 my $infodbtype = $processor->{'infodbtype'};
229 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
230
231 # see if this has a archives information file within it
232## my $archive_info_filename = &FileUtils::filenameConcatenate($base_dir,$file,"archives.inf");
233 my $archive_info_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", &FileUtils::filenameConcatenate($base_dir, $file));
234
235 if (-e $archive_info_filename) {
236
237 # found an archives.inf file
238 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
239
240 # read in the archives information file
241 my $archive_info = new arcinfo($infodbtype);
242 $self->{'archive_info'} = $archive_info;
243 $self->{'archive_info_filename'} = $archive_info_filename;
244 if ($self->{'reversesort'}) {
245 $archive_info->reverse_sort();
246 }
247
248 $archive_info->load_info ($archive_info_filename);
249
250 my $file_list = $archive_info->get_file_list();
251
252 # process each file
253 foreach my $subfile (@$file_list) {
254
255 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
256
257 my $tmp = &FileUtils::filenameConcatenate($file, $subfile->[0]);
258 next if $tmp eq $file;
259
260 my $doc_oid = $subfile->[1];
261 my $index_status = $archive_info->get_status_info($doc_oid);
262
263 my $curr_mode = $processor->get_mode();
264 my $new_mode = $curr_mode;
265
266 # Start by assuming we want to process the file...
267 my $process_file = 1;
268
269 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
270 if ($processor->is_incremental_capable() && $self->{'incremental'})
271 {
272 # Check to see if the file needs indexing
273 if ($index_status eq "B")
274 {
275 # Don't process this file as it has already been indexed
276 $process_file = 0;
277 }
278 elsif ($index_status eq "D") {
279 # Need to be delete it from the index.
280 $new_mode = $curr_mode."delete";
281 $process_file = 1;
282 }
283 elsif ($index_status eq "R") {
284 # Need to be reindexed/replaced
285 $new_mode = $curr_mode."reindex";
286
287 $process_file = 1;
288 }
289 }
290 # ... or we're being asked to delete it (in which case skip it)
291 elsif ($index_status eq "D") {
292 # Non-incremental Delete
293 # It's already been deleted from the archives directory
294 # (done during import.pl)
295 # => All we need to do here is not process it
296
297 $process_file = 0;
298 }
299
300 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
301 # Nag feature
302 if (!defined $self->{'incremental-warning'}) {
303 print $outhandle "\n";
304 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
305 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
306 print $outhandle "\n";
307 sleep 10;
308 $self->{'incremental-warning'} = 1;
309 }
310 }
311
312 if ($process_file) {
313 # note: metadata is not carried on to the next level
314
315 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
316
317 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
318
319 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
320 }
321 }
322
323 return $count;
324 }
325
326
327 # wasn't an archives directory, someone else will have to process it
328 return undef;
329}
330
3311;
Note: See TracBrowser for help on using the repository browser.