root/main/trunk/greenstone2/perllib/plugins/ArchivesInfPlugin.pm @ 21614

Revision 21614, 9.1 KB (checked in by mdewsnip, 10 years ago)

Changed ArchivesInfPlugin?.pm so it uses the infodbtype value from the collect.cfg file, instead of always using GDBM. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf (or archiveinf-doc info database equivalent)
27# -- i.e. the file generated in the archives directory when an import is done),
28# processing each file it finds
29
30package ArchivesInfPlugin;
31
32use util;
33use doc;
34use PrintInfo;
35use plugin;
36use arcinfo;
37use gsprintf;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42
43BEGIN {
44    @ArchivesInfPlugin::ISA = ('PrintInfo');
45}
46
47my $arguments = [
48      { 'name' => "reversesort",
49    'desc' => "{ArchivesInfPlugin.reversesort}",
50    'type' => "flag",
51    'reqd' => "no",
52    'modegli' => "2" },
53
54         ];
55
56my $options = { 'name'     => "ArchivesInfPlugin",
57        'desc'     => "{ArchivesInfPlugin.desc}",
58        'abstract' => "no",
59        'inherits' => "yes",
60            'args' => $arguments};
61         
62sub gsprintf
63{
64    return &gsprintf::gsprintf(@_);
65}
66
67sub new {
68    my ($class) = shift (@_);
69    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
70    push(@$pluginlist, $class);
71
72    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
73    push(@{$hashArgOptLists->{"OptList"}},$options);
74
75    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
76
77    return bless $self, $class;
78}
79
80# called once, at the start of processing
81sub init {
82    my $self = shift (@_);
83    my ($verbosity, $outhandle, $failhandle) = @_;
84
85    # verbosity is passed through from the processor
86    $self->{'verbosity'} = $verbosity;
87
88    # as are the outhandle and failhandle
89    $self->{'outhandle'} = $outhandle if defined $outhandle;
90    $self->{'failhandle'} = $failhandle;
91
92}
93
94sub deinit {
95    my ($self) = @_;
96
97    my $archive_info = $self->{'archive_info'};
98    my $verbosity = $self->{'verbosity'};
99    my $outhandle = $self->{'outhandle'};
100
101    if (defined $archive_info) {
102        # Get the infodbtype value for this collection from the arcinfo object
103        my $infodbtype = $archive_info->{'infodbtype'};
104    my $archive_info_filename = $self->{'archive_info_filename'};
105    my $infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $archive_info_filename, "append");
106
107        my $file_list = $archive_info->get_file_list();
108
109    foreach my $subfile (@$file_list) {     
110        my $doc_oid = $subfile->[1];
111
112        my $index_status = $archive_info->get_status_info($doc_oid);
113
114        if ($index_status eq "D") {
115        # delete
116        $archive_info->delete_info($doc_oid);
117        &dbutil::delete_infodb_entry($infodbtype, $infodb_file_handle, $doc_oid);
118
119        my $doc_file = $subfile->[0];
120        my $base_dir =$self->{'base_dir'};
121
122        my $doc_filename = &util::filename_cat($base_dir,$doc_file);
123
124        my ($doc_tailname, $doc_dirname, $suffix)
125            = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
126
127        print $outhandle "Removing $doc_dirname\n" if ($verbosity>2);
128
129        &util::rm_r($doc_dirname);
130
131
132        }
133        elsif ($index_status =~ m/^(I|R)$/) {
134        # mark as "been indexed"
135        $archive_info->set_status_info($doc_oid,"B");
136        }
137    }
138
139    &dbutil::close_infodb_write_handle($infodbtype, $infodb_file_handle);
140    $archive_info->save_info($archive_info_filename);
141    }
142}
143
144# called at the beginning of each plugin pass (import has one, buildin has many)
145sub begin {
146    my $self = shift (@_);
147    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148
149    $self->{'base_dir'} = $base_dir;
150}
151
152sub remove_all {
153    my $self = shift (@_);
154    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
155
156}
157
158sub remove_one {
159    my $self = shift (@_);
160    my ($file, $oids, $archivedir) = @_;
161    return undef; # only called during import at this stage, this will never be processing a file
162   
163}
164
165
166# called at the end of each plugin pass
167sub end {
168    my ($self) = shift (@_);
169
170}
171
172
173# return 1 if this class might recurse using $pluginfo
174sub is_recursive {
175    my $self = shift (@_);
176
177    return 1;
178}
179
180
181sub compile_stats {
182    my $self = shift(@_);
183    my ($stats) = @_;
184}
185
186# We don't do metadata_read
187sub metadata_read {
188    my $self = shift (@_);
189    my ($pluginfo, $base_dir, $file, $block_hash,
190    $extrametakeys, $extrametadata, $extrametafile,
191    $processor, $maxdocs, $gli) = @_;
192
193    return undef;
194}
195
196sub file_block_read {
197
198    my $self = shift (@_); 
199    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
200
201    if ($file eq "OIDcount") {
202    my ($filename_full_path, $filename_no_path)
203        = &util::get_full_filenames($base_dir, $file);
204    $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
205    return 1;
206    }
207
208    # otherwise, we don't do any file blocking
209
210    return undef;
211}
212
213
214# return number of files processed, undef if can't process
215# Note that $base_dir might be "" and that $file might
216# include directories
217sub read {
218    my $self = shift (@_);
219    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
220    my $outhandle = $self->{'outhandle'};
221
222    my $count = 0;
223
224    # This function only makes sense at build-time
225    return if (ref($processor) !~ /buildproc$/);
226
227    # Get the infodbtype value for this collection from the buildproc object
228    my $infodbtype = $processor->{'infodbtype'};
229
230    # see if this has a archives information file within it
231##    my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
232    my $archive_info_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", &util::filename_cat($base_dir, $file));
233
234    if (-e $archive_info_filename) {
235
236    # found an archives.inf file
237    &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
238
239    # read in the archives information file
240    my $archive_info = new arcinfo($infodbtype);
241    $self->{'archive_info'} = $archive_info;
242    $self->{'archive_info_filename'} = $archive_info_filename;
243    if ($self->{'reversesort'}) {
244        $archive_info->reverse_sort();
245    }
246   
247    $archive_info->load_info ($archive_info_filename);
248   
249    my $file_list = $archive_info->get_file_list();
250
251    # process each file
252    foreach my $subfile (@$file_list) {
253
254        last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
255
256        my $tmp = &util::filename_cat ($file, $subfile->[0]);
257        next if $tmp eq $file;
258
259        my $doc_oid = $subfile->[1];
260        my $index_status = $archive_info->get_status_info($doc_oid);
261
262        my $curr_mode = $processor->get_mode();
263        my $new_mode = $curr_mode;
264
265        # Start by assuming we want to process the file...
266        my $process_file = 1;
267
268        # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
269        if ($processor->is_incremental_capable() && $self->{'incremental'})
270        {
271            # Check to see if the file needs indexing
272        if ($index_status eq "B")
273        {
274            # Don't process this file as it has already been indexed
275            $process_file = 0;
276        }
277        elsif ($index_status eq "D") {
278            # Need to be delete it from the index.
279            $new_mode = $curr_mode."delete";
280            $process_file = 1;
281        }
282        elsif ($index_status eq "R") {
283            # Need to be reindexed/replaced
284            $new_mode = $curr_mode."reindex";
285
286            $process_file = 1;
287        }
288        }
289        # ... or we're being asked to delete it (in which case skip it)
290        elsif ($index_status eq "D") {
291        # Non-incremental Delete
292        # It's already been deleted from the archives directory
293        # (done during import.pl)
294        # => All we need to do here is not process it
295
296        $process_file = 0;
297        }
298
299        if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
300        # Nag feature
301        if (!defined $self->{'incremental-warning'}) {
302            print $outhandle "\n";
303            print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
304            print $outhandle "         processor '", ref $processor, "'. Some conflicts may arise.\n";
305            print $outhandle "\n";
306            sleep 10;
307            $self->{'incremental-warning'} = 1;
308        }
309        }
310
311        if ($process_file) {
312        # note: metadata is not carried on to the next level
313       
314        $processor->set_mode($new_mode) if ($new_mode ne $curr_mode);
315
316        $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
317
318        $processor->set_mode($curr_mode) if ($new_mode ne $curr_mode);
319        }
320    }
321
322    return $count;
323    }
324
325
326    # wasn't an archives directory, someone else will have to process it
327    return undef;
328}
329
3301;
Note: See TracBrowser for help on using the browser.