source: main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm@ 22597

Last change on this file since 22597 was 21614, checked in by mdewsnip, 14 years ago

Changed ArchivesInfPlugin.pm so it uses the infodbtype value from the collect.cfg file, instead of always using GDBM. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
RevLine 
[537]1###########################################################################
2#
[16013]3# ArchivesInfPlugin.pm --
[537]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[21566]26# plugin which reads through an archives.inf (or archiveinf-doc info database equivalent)
27# -- i.e. the file generated in the archives directory when an import is done),
28# processing each file it finds
[4]29
[15870]30package ArchivesInfPlugin;
[4]31
32use util;
[18528]33use doc;
[17738]34use PrintInfo;
[4]35use plugin;
36use arcinfo;
[5680]37use gsprintf;
[4]38
[10254]39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
[21563]42
[4]43BEGIN {
[17738]44 @ArchivesInfPlugin::ISA = ('PrintInfo');
[4]45}
46
[10254]47my $arguments = [
[20758]48 { 'name' => "reversesort",
49 'desc' => "{ArchivesInfPlugin.reversesort}",
50 'type' => "flag",
51 'reqd' => "no",
52 'modegli' => "2" },
53
[10254]54 ];
55
[15870]56my $options = { 'name' => "ArchivesInfPlugin",
57 'desc' => "{ArchivesInfPlugin.desc}",
[6408]58 'abstract' => "no",
[20760]59 'inherits' => "yes",
60 'args' => $arguments};
[10254]61
[5680]62sub gsprintf
63{
64 return &gsprintf::gsprintf(@_);
65}
66
[4]67sub new {
[10218]68 my ($class) = shift (@_);
69 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
70 push(@$pluginlist, $class);
[4]71
[15870]72 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
73 push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]74
[17738]75 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
[10218]76
[4]77 return bless $self, $class;
78}
79
[17738]80# called once, at the start of processing
81sub init {
82 my $self = shift (@_);
83 my ($verbosity, $outhandle, $failhandle) = @_;
84
85 # verbosity is passed through from the processor
86 $self->{'verbosity'} = $verbosity;
87
88 # as are the outhandle and failhandle
89 $self->{'outhandle'} = $outhandle if defined $outhandle;
90 $self->{'failhandle'} = $failhandle;
91
92}
93
[10156]94sub deinit {
95 my ($self) = @_;
96
97 my $archive_info = $self->{'archive_info'};
[18508]98 my $verbosity = $self->{'verbosity'};
99 my $outhandle = $self->{'outhandle'};
[10156]100
101 if (defined $archive_info) {
[21614]102 # Get the infodbtype value for this collection from the arcinfo object
103 my $infodbtype = $archive_info->{'infodbtype'};
[10156]104 my $archive_info_filename = $self->{'archive_info_filename'};
[21614]105 my $infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $archive_info_filename, "append");
[10156]106
107 my $file_list = $archive_info->get_file_list();
108
[18456]109 foreach my $subfile (@$file_list) {
[10156]110 my $doc_oid = $subfile->[1];
[18441]111
[10254]112 my $index_status = $archive_info->get_status_info($doc_oid);
[18456]113
[18441]114 if ($index_status eq "D") {
115 # delete
116 $archive_info->delete_info($doc_oid);
[21614]117 &dbutil::delete_infodb_entry($infodbtype, $infodb_file_handle, $doc_oid);
[18508]118
119 my $doc_file = $subfile->[0];
120 my $base_dir =$self->{'base_dir'};
121
122 my $doc_filename = &util::filename_cat($base_dir,$doc_file);
123
124 my ($doc_tailname, $doc_dirname, $suffix)
125 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
126
[18509]127 print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
[18508]128
129 &util::rm_r($doc_dirname);
130
131
[18441]132 }
133 elsif ($index_status =~ m/^(I|R)$/) {
134 # mark as "been indexed"
135 $archive_info->set_status_info($doc_oid,"B");
136 }
[10156]137 }
138
[21614]139 &dbutil::close_infodb_write_handle($infodbtype, $infodb_file_handle);
[10156]140 $archive_info->save_info($archive_info_filename);
141 }
142}
143
[17738]144# called at the beginning of each plugin pass (import has one, buildin has many)
145sub begin {
146 my $self = shift (@_);
147 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148
[18508]149 $self->{'base_dir'} = $base_dir;
[17738]150}
151
[21308]152sub remove_all {
[21285]153 my $self = shift (@_);
154 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
155
156}
[21308]157
158sub remove_one {
159 my $self = shift (@_);
[21315]160 my ($file, $oids, $archivedir) = @_;
161 return undef; # only called during import at this stage, this will never be processing a file
162
[21308]163}
164
165
[17738]166# called at the end of each plugin pass
167sub end {
168 my ($self) = shift (@_);
169
170}
171
172
[4]173# return 1 if this class might recurse using $pluginfo
174sub is_recursive {
175 my $self = shift (@_);
176
177 return 1;
178}
179
[10156]180
[17738]181sub compile_stats {
182 my $self = shift(@_);
183 my ($stats) = @_;
184}
[10156]185
[17738]186# We don't do metadata_read
187sub metadata_read {
188 my $self = shift (@_);
[19493]189 my ($pluginfo, $base_dir, $file, $block_hash,
190 $extrametakeys, $extrametadata, $extrametafile,
191 $processor, $maxdocs, $gli) = @_;
[10156]192
[17738]193 return undef;
194}
195
196sub file_block_read {
197
198 my $self = shift (@_);
199 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
200
[18528]201 if ($file eq "OIDcount") {
202 my ($filename_full_path, $filename_no_path)
203 = &util::get_full_filenames($base_dir, $file);
204 $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
205 return 1;
206 }
207
208 # otherwise, we don't do any file blocking
209
[17738]210 return undef;
211}
212
213
[317]214# return number of files processed, undef if can't process
[4]215# Note that $base_dir might be "" and that $file might
216# include directories
217sub read {
218 my $self = shift (@_);
[16392]219 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
[1424]220 my $outhandle = $self->{'outhandle'};
[4]221
[317]222 my $count = 0;
223
[21614]224 # This function only makes sense at build-time
225 return if (ref($processor) !~ /buildproc$/);
226
227 # Get the infodbtype value for this collection from the buildproc object
228 my $infodbtype = $processor->{'infodbtype'};
229
[4]230 # see if this has a archives information file within it
[18441]231## my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
[21614]232 my $archive_info_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", &util::filename_cat($base_dir, $file));
[18659]233
[4]234 if (-e $archive_info_filename) {
235
[317]236 # found an archives.inf file
[15870]237 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
[317]238
[4]239 # read in the archives information file
[21614]240 my $archive_info = new arcinfo($infodbtype);
[10156]241 $self->{'archive_info'} = $archive_info;
[12397]242 $self->{'archive_info_filename'} = $archive_info_filename;
[20758]243 if ($self->{'reversesort'}) {
244 $archive_info->reverse_sort();
245 }
246
[4]247 $archive_info->load_info ($archive_info_filename);
248
[230]249 my $file_list = $archive_info->get_file_list();
[4]250
251 # process each file
[1244]252 foreach my $subfile (@$file_list) {
[18441]253
[9853]254 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
[317]255
[4]256 my $tmp = &util::filename_cat ($file, $subfile->[0]);
257 next if $tmp eq $file;
[18456]258
259 my $doc_oid = $subfile->[1];
260 my $index_status = $archive_info->get_status_info($doc_oid);
261
262 my $curr_mode = $processor->get_mode();
263 my $new_mode = $curr_mode;
264
265 # Start by assuming we want to process the file...
[16257]266 my $process_file = 1;
[10156]267
[18469]268 # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
[16257]269 if ($processor->is_incremental_capable() && $self->{'incremental'})
270 {
[18441]271 # Check to see if the file needs indexing
[16257]272 if ($index_status eq "B")
273 {
[18441]274 # Don't process this file as it has already been indexed
[16257]275 $process_file = 0;
[10305]276 }
[18456]277 elsif ($index_status eq "D") {
278 # Need to be delete it from the index.
279 $new_mode = $curr_mode."delete";
280 $process_file = 1;
281 }
282 elsif ($index_status eq "R") {
[18469]283 # Need to be reindexed/replaced
[18456]284 $new_mode = $curr_mode."reindex";
[18469]285
[18456]286 $process_file = 1;
287 }
[10305]288 }
[18456]289 # ... or we're being asked to delete it (in which case skip it)
290 elsif ($index_status eq "D") {
[18469]291 # Non-incremental Delete
292 # It's already been deleted from the archives directory
293 # (done during import.pl)
294 # => All we need to do here is not process it
[10305]295
[18456]296 $process_file = 0;
297 }
298
[18469]299 if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
300 # Nag feature
301 if (!defined $self->{'incremental-warning'}) {
302 print $outhandle "\n";
303 print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
304 print $outhandle " processor '", ref $processor, "'. Some conflicts may arise.\n";
305 print $outhandle "\n";
306 sleep 10;
307 $self->{'incremental-warning'} = 1;
308 }
309 }
310
[10305]311 if ($process_file) {
[10156]312 # note: metadata is not carried on to the next level
[18456]313
314 $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
315
[16392]316 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
[18456]317
318 $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
[10156]319 }
[4]320 }
321
[317]322 return $count;
[4]323 }
324
[18528]325
[4]326 # wasn't an archives directory, someone else will have to process it
[317]327 return undef;
[4]328}
329
3301;
Note: See TracBrowser for help on using the repository browser.