source: gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm@ 17863

Last change on this file since 17863 was 17738, checked in by kjdon, 16 years ago

AbstractPLugin has been removedd, so these now inherit from PrintInfo, and code from AbstractPlugin has been added to these two classes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
RevLine 
[537]1###########################################################################
2#
[16013]3# ArchivesInfPlugin.pm --
[537]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[17738]26# plugin which reads through an archives.inf file
[317]27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
[4]29
[15870]30package ArchivesInfPlugin;
[4]31
32use util;
[17738]33use PrintInfo;
[4]34use plugin;
35use arcinfo;
[5680]36use gsprintf;
[4]37
[10254]38use strict;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
[4]41BEGIN {
[17738]42 @ArchivesInfPlugin::ISA = ('PrintInfo');
[4]43}
44
[10254]45my $arguments = [
46 ];
47
[15870]48my $options = { 'name' => "ArchivesInfPlugin",
49 'desc' => "{ArchivesInfPlugin.desc}",
[6408]50 'abstract' => "no",
51 'inherits' => "yes" };
[10254]52
[5680]53sub gsprintf
54{
55 return &gsprintf::gsprintf(@_);
56}
57
[4]58sub new {
[10218]59 my ($class) = shift (@_);
60 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
61 push(@$pluginlist, $class);
[4]62
[15870]63 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
64 push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]65
[17738]66 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
[10218]67
[4]68 return bless $self, $class;
69}
70
[17738]71# called once, at the start of processing
72sub init {
73 my $self = shift (@_);
74 my ($verbosity, $outhandle, $failhandle) = @_;
75
76 # verbosity is passed through from the processor
77 $self->{'verbosity'} = $verbosity;
78
79 # as are the outhandle and failhandle
80 $self->{'outhandle'} = $outhandle if defined $outhandle;
81 $self->{'failhandle'} = $failhandle;
82
83}
84
[10156]85sub deinit {
86 my ($self) = @_;
87
88 my $archive_info = $self->{'archive_info'};
89
90 if (defined $archive_info) {
91 my $archive_info_filename = $self->{'archive_info_filename'};
92
93 my $file_list = $archive_info->get_file_list();
94
95 # change each file to "Been Indexed"
96
97 foreach my $subfile (@$file_list) {
98 my $doc_oid = $subfile->[1];
[10254]99 # why do we get this when it is not used???
100 my $index_status = $archive_info->get_status_info($doc_oid);
[10156]101 $archive_info->set_status_info($doc_oid,"B");
102 }
103
104 $archive_info->save_info($archive_info_filename);
105 }
106}
107
[17738]108# called at the beginning of each plugin pass (import has one, buildin has many)
109sub begin {
110 my $self = shift (@_);
111 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
112
113}
114
115# called at the end of each plugin pass
116sub end {
117 my ($self) = shift (@_);
118
119}
120
121# called if we are doing incremental building
122sub set_incremental {
123 my $self = shift(@_);
124 my ($incremental) = @_;
125
126 $self->{'incremental'} = $incremental;
127}
128
[4]129# return 1 if this class might recurse using $pluginfo
130sub is_recursive {
131 my $self = shift (@_);
132
133 return 1;
134}
135
[10156]136
[17738]137sub compile_stats {
138 my $self = shift(@_);
139 my ($stats) = @_;
140}
[10156]141
[17738]142# We don't do metadata_read
143sub metadata_read {
144 my $self = shift (@_);
145 my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
[10156]146
[17738]147 return undef;
148}
149
150# we don't do any file blocking
151sub file_block_read {
152
153 my $self = shift (@_);
154 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
155
156 return undef;
157}
158
159
[317]160# return number of files processed, undef if can't process
[4]161# Note that $base_dir might be "" and that $file might
162# include directories
163sub read {
164 my $self = shift (@_);
[16392]165 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
[1424]166 my $outhandle = $self->{'outhandle'};
[4]167
[317]168 my $count = 0;
169
[4]170 # see if this has a archives information file within it
[1244]171 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
[4]172
173 if (-e $archive_info_filename) {
174
[317]175 # found an archives.inf file
[15870]176 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
[317]177
[4]178 # read in the archives information file
179 my $archive_info = new arcinfo ();
[10156]180 $self->{'archive_info'} = $archive_info;
[12397]181 $self->{'archive_info_filename'} = $archive_info_filename;
[10156]182
[4]183 $archive_info->load_info ($archive_info_filename);
184
[230]185 my $file_list = $archive_info->get_file_list();
[4]186
187 # process each file
[1244]188 foreach my $subfile (@$file_list) {
[9853]189 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
[317]190
[4]191 my $tmp = &util::filename_cat ($file, $subfile->[0]);
192 next if $tmp eq $file;
[16392]193
[16257]194 # We always process the file...
195 my $process_file = 1;
[10156]196
[16257]197 # ...unless the build processor is incremental capable and -incremental was specified
198 if ($processor->is_incremental_capable() && $self->{'incremental'})
199 {
200 # We don't need to process the file if it has already been built
201 my $doc_oid = $subfile->[1];
202 my $index_status = $archive_info->get_status_info($doc_oid);
203 if ($index_status eq "B")
204 {
205 # Don't process this file
206 $process_file = 0;
[10305]207 }
208 }
209
210 if ($process_file) {
[10156]211 # note: metadata is not carried on to the next level
[16392]212 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
[10156]213 }
214
[4]215 }
216
[317]217 return $count;
[4]218 }
219
220 # wasn't an archives directory, someone else will have to process it
[317]221 return undef;
[4]222}
223
2241;
Note: See TracBrowser for help on using the repository browser.