source: gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm@ 17739

Last change on this file since 17739 was 17738, checked in by kjdon, 15 years ago

AbstractPLugin has been removedd, so these now inherit from PrintInfo, and code from AbstractPlugin has been added to these two classes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which reads through an archives.inf file
27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
29
30package ArchivesInfPlugin;
31
32use util;
33use PrintInfo;
34use plugin;
35use arcinfo;
36use gsprintf;
37
38use strict;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41BEGIN {
42 @ArchivesInfPlugin::ISA = ('PrintInfo');
43}
44
45my $arguments = [
46 ];
47
48my $options = { 'name' => "ArchivesInfPlugin",
49 'desc' => "{ArchivesInfPlugin.desc}",
50 'abstract' => "no",
51 'inherits' => "yes" };
52
53sub gsprintf
54{
55 return &gsprintf::gsprintf(@_);
56}
57
58sub new {
59 my ($class) = shift (@_);
60 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
61 push(@$pluginlist, $class);
62
63 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
64 push(@{$hashArgOptLists->{"OptList"}},$options);
65
66 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
67
68 return bless $self, $class;
69}
70
71# called once, at the start of processing
72sub init {
73 my $self = shift (@_);
74 my ($verbosity, $outhandle, $failhandle) = @_;
75
76 # verbosity is passed through from the processor
77 $self->{'verbosity'} = $verbosity;
78
79 # as are the outhandle and failhandle
80 $self->{'outhandle'} = $outhandle if defined $outhandle;
81 $self->{'failhandle'} = $failhandle;
82
83}
84
85sub deinit {
86 my ($self) = @_;
87
88 my $archive_info = $self->{'archive_info'};
89
90 if (defined $archive_info) {
91 my $archive_info_filename = $self->{'archive_info_filename'};
92
93 my $file_list = $archive_info->get_file_list();
94
95 # change each file to "Been Indexed"
96
97 foreach my $subfile (@$file_list) {
98 my $doc_oid = $subfile->[1];
99 # why do we get this when it is not used???
100 my $index_status = $archive_info->get_status_info($doc_oid);
101 $archive_info->set_status_info($doc_oid,"B");
102 }
103
104 $archive_info->save_info($archive_info_filename);
105 }
106}
107
108# called at the beginning of each plugin pass (import has one, buildin has many)
109sub begin {
110 my $self = shift (@_);
111 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
112
113}
114
115# called at the end of each plugin pass
116sub end {
117 my ($self) = shift (@_);
118
119}
120
121# called if we are doing incremental building
122sub set_incremental {
123 my $self = shift(@_);
124 my ($incremental) = @_;
125
126 $self->{'incremental'} = $incremental;
127}
128
129# return 1 if this class might recurse using $pluginfo
130sub is_recursive {
131 my $self = shift (@_);
132
133 return 1;
134}
135
136
137sub compile_stats {
138 my $self = shift(@_);
139 my ($stats) = @_;
140}
141
142# We don't do metadata_read
143sub metadata_read {
144 my $self = shift (@_);
145 my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
146
147 return undef;
148}
149
150# we don't do any file blocking
151sub file_block_read {
152
153 my $self = shift (@_);
154 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
155
156 return undef;
157}
158
159
160# return number of files processed, undef if can't process
161# Note that $base_dir might be "" and that $file might
162# include directories
163sub read {
164 my $self = shift (@_);
165 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
166 my $outhandle = $self->{'outhandle'};
167
168 my $count = 0;
169
170 # see if this has a archives information file within it
171 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
172
173 if (-e $archive_info_filename) {
174
175 # found an archives.inf file
176 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
177
178 # read in the archives information file
179 my $archive_info = new arcinfo ();
180 $self->{'archive_info'} = $archive_info;
181 $self->{'archive_info_filename'} = $archive_info_filename;
182
183 $archive_info->load_info ($archive_info_filename);
184
185 my $file_list = $archive_info->get_file_list();
186
187 # process each file
188 foreach my $subfile (@$file_list) {
189 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
190
191 my $tmp = &util::filename_cat ($file, $subfile->[0]);
192 next if $tmp eq $file;
193
194 # We always process the file...
195 my $process_file = 1;
196
197 # ...unless the build processor is incremental capable and -incremental was specified
198 if ($processor->is_incremental_capable() && $self->{'incremental'})
199 {
200 # We don't need to process the file if it has already been built
201 my $doc_oid = $subfile->[1];
202 my $index_status = $archive_info->get_status_info($doc_oid);
203 if ($index_status eq "B")
204 {
205 # Don't process this file
206 $process_file = 0;
207 }
208 }
209
210 if ($process_file) {
211 # note: metadata is not carried on to the next level
212 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
213 }
214
215 }
216
217 return $count;
218 }
219
220 # wasn't an archives directory, someone else will have to process it
221 return undef;
222}
223
2241;
Note: See TracBrowser for help on using the repository browser.